pion  5.0.6
algorithm.cpp
1 // ---------------------------------------------------------------------
2 // pion: a Boost C++ framework for building lightweight HTTP interfaces
3 // ---------------------------------------------------------------------
4 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #include <cmath>
11 #include <cstdlib>
12 #include <cstdio>
13 #include <cstring>
14 #include <pion/algorithm.hpp>
15 #include <boost/assert.hpp>
16 
17 // macro to shift bitmask by a single bit
18 #define SHIFT_BITMASK(ptr, mask) if (mask & 0x01) { mask = 0x80; ++ptr; } else mask >>= 1;
19 
20 
21 namespace pion { // begin namespace pion
22 
23 
24 bool algorithm::base64_decode(const std::string &input, std::string &output)
25 {
26  static const char nop = -1;
27  static const char decoding_data[] = {
28  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
29  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
30  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop, 62, nop,nop,nop, 63,
31  52, 53, 54, 55, 56, 57, 58, 59, 60, 61,nop,nop, nop,nop,nop,nop,
32  nop, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
33  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,nop, nop,nop,nop,nop,
34  nop,26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
35  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,nop, nop,nop,nop,nop,
36  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
37  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
38  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
39  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
40  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
41  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
42  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
43  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop
44  };
45 
46  unsigned int input_length=input.size();
47  const char * input_ptr = input.data();
48 
49  // allocate space for output string
50  output.clear();
51  output.reserve(((input_length+2)/3)*4);
52 
53  // for each 4-bytes sequence from the input, extract 4 6-bits sequences by droping first two bits
54  // and regenerate into 3 8-bits sequence
55 
56  for (unsigned int i=0; i<input_length;i++) {
57  char base64code0;
58  char base64code1;
59  char base64code2 = 0; // initialized to 0 to suppress warnings
60  char base64code3;
61 
62  base64code0 = decoding_data[static_cast<int>(input_ptr[i])];
63  if(base64code0==nop) // non base64 character
64  return false;
65  if(!(++i<input_length)) // we need at least two input bytes for first byte output
66  return false;
67  base64code1 = decoding_data[static_cast<int>(input_ptr[i])];
68  if(base64code1==nop) // non base64 character
69  return false;
70 
71  output += ((base64code0 << 2) | ((base64code1 >> 4) & 0x3));
72 
73  if(++i<input_length) {
74  char c = input_ptr[i];
75  if(c =='=') { // padding , end of input
76  BOOST_ASSERT( (base64code1 & 0x0f)==0);
77  return true;
78  }
79  base64code2 = decoding_data[static_cast<int>(input_ptr[i])];
80  if(base64code2==nop) // non base64 character
81  return false;
82 
83  output += ((base64code1 << 4) & 0xf0) | ((base64code2 >> 2) & 0x0f);
84  }
85 
86  if(++i<input_length) {
87  char c = input_ptr[i];
88  if(c =='=') { // padding , end of input
89  BOOST_ASSERT( (base64code2 & 0x03)==0);
90  return true;
91  }
92  base64code3 = decoding_data[static_cast<int>(input_ptr[i])];
93  if(base64code3==nop) // non base64 character
94  return false;
95 
96  output += (((base64code2 << 6) & 0xc0) | base64code3 );
97  }
98 
99  }
100 
101  return true;
102 }
103 
104 bool algorithm::base64_encode(const std::string &input, std::string &output)
105 {
106  static const char encoding_data[] =
107  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
108 
109  unsigned int input_length=input.size();
110  const char * input_ptr = input.data();
111 
112  // allocate space for output string
113  output.clear();
114  output.reserve(((input_length+2)/3)*4);
115 
116  // for each 3-bytes sequence from the input, extract 4 6-bits sequences and encode using
117  // encoding_data lookup table.
118  // if input do not contains enough chars to complete 3-byte sequence,use pad char '='
119  for (unsigned int i=0; i<input_length;i++) {
120  int base64code0=0;
121  int base64code1=0;
122  int base64code2=0;
123  int base64code3=0;
124 
125  base64code0 = (input_ptr[i] >> 2) & 0x3f; // 1-byte 6 bits
126  output += encoding_data[base64code0];
127  base64code1 = (input_ptr[i] << 4 ) & 0x3f; // 1-byte 2 bits +
128 
129  if (++i < input_length) {
130  base64code1 |= (input_ptr[i] >> 4) & 0x0f; // 2-byte 4 bits
131  output += encoding_data[base64code1];
132  base64code2 = (input_ptr[i] << 2) & 0x3f; // 2-byte 4 bits +
133 
134  if (++i < input_length) {
135  base64code2 |= (input_ptr[i] >> 6) & 0x03; // 3-byte 2 bits
136  base64code3 = input_ptr[i] & 0x3f; // 3-byte 6 bits
137  output += encoding_data[base64code2];
138  output += encoding_data[base64code3];
139  } else {
140  output += encoding_data[base64code2];
141  output += '=';
142  }
143  } else {
144  output += encoding_data[base64code1];
145  output += '=';
146  output += '=';
147  }
148  }
149 
150  return true;
151 }
152 
153 std::string algorithm::url_decode(const std::string& str)
154 {
155  char decode_buf[3];
156  std::string result;
157  result.reserve(str.size());
158 
159  for (std::string::size_type pos = 0; pos < str.size(); ++pos) {
160  switch(str[pos]) {
161  case '+':
162  // convert to space character
163  result += ' ';
164  break;
165  case '%':
166  // decode hexidecimal value
167  if (pos + 2 < str.size()) {
168  decode_buf[0] = str[++pos];
169  decode_buf[1] = str[++pos];
170  decode_buf[2] = '\0';
171  result += static_cast<char>( strtol(decode_buf, 0, 16) );
172  } else {
173  // recover from error by not decoding character
174  result += '%';
175  }
176  break;
177  default:
178  // character does not need to be escaped
179  result += str[pos];
180  }
181  };
182 
183  return result;
184 }
185 
186 std::string algorithm::url_encode(const std::string& str)
187 {
188  char encode_buf[4];
189  std::string result;
190  encode_buf[0] = '%';
191  result.reserve(str.size());
192 
193  // character selection for this algorithm is based on the following url:
194  // http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
195 
196  for (std::string::size_type pos = 0; pos < str.size(); ++pos) {
197  switch(str[pos]) {
198  default:
199  if (str[pos] > 32 && str[pos] < 127) {
200  // character does not need to be escaped
201  result += str[pos];
202  break;
203  }
204  // else pass through to next case
205  case ' ':
206  case '$': case '&': case '+': case ',': case '/': case ':':
207  case ';': case '=': case '?': case '@': case '"': case '<':
208  case '>': case '#': case '%': case '{': case '}': case '|':
209  case '\\': case '^': case '~': case '[': case ']': case '`':
210  // the character needs to be encoded
211  sprintf(encode_buf+1, "%.2X", (unsigned char)(str[pos]));
212  result += encode_buf;
213  break;
214  }
215  };
216 
217  return result;
218 }
219 
220 // TODO
221 //std::string algorithm::xml_decode(const std::string& str)
222 //{
223 //}
224 
225 std::string algorithm::xml_encode(const std::string& str)
226 {
227  std::string result;
228  result.reserve(str.size() + 20); // Assume ~5 characters converted (length increases)
229  const unsigned char *ptr = reinterpret_cast<const unsigned char*>(str.c_str());
230  const unsigned char *end_ptr = ptr + str.size();
231  while (ptr < end_ptr) {
232  // check byte ranges for valid UTF-8
233  // see http://en.wikipedia.org/wiki/UTF-8
234  // also, see http://www.w3.org/TR/REC-xml/#charsets
235  // this implementation is the strictest subset of both
236  if ((*ptr >= 0x20 && *ptr <= 0x7F) || *ptr == 0x9 || *ptr == 0xa || *ptr == 0xd) {
237  // regular ASCII character
238  switch(*ptr) {
239  // Escape special XML characters.
240  case '&':
241  result += "&amp;";
242  break;
243  case '<':
244  result += "&lt;";
245  break;
246  case '>':
247  result += "&gt;";
248  break;
249  case '\"':
250  result += "&quot;";
251  break;
252  case '\'':
253  result += "&apos;";
254  break;
255  default:
256  result += *ptr;
257  }
258  } else if (*ptr >= 0xC2 && *ptr <= 0xDF) {
259  // two-byte sequence
260  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF) {
261  result += *ptr;
262  result += *(++ptr);
263  } else {
264  // insert replacement char
265  result += 0xef;
266  result += 0xbf;
267  result += 0xbd;
268  }
269  } else if (*ptr >= 0xE0 && *ptr <= 0xEF) {
270  // three-byte sequence
271  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF
272  && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF) {
273  result += *ptr;
274  result += *(++ptr);
275  result += *(++ptr);
276  } else {
277  // insert replacement char
278  result += 0xef;
279  result += 0xbf;
280  result += 0xbd;
281  }
282  } else if (*ptr >= 0xF0 && *ptr <= 0xF4) {
283  // four-byte sequence
284  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF
285  && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF
286  && *(ptr+3) >= 0x80 && *(ptr+3) <= 0xBF) {
287  result += *ptr;
288  result += *(++ptr);
289  result += *(++ptr);
290  result += *(++ptr);
291  } else {
292  // insert replacement char
293  result += 0xef;
294  result += 0xbf;
295  result += 0xbd;
296  }
297  } else {
298  // insert replacement char
299  result += 0xef;
300  result += 0xbf;
301  result += 0xbd;
302  }
303  ++ptr;
304  }
305 
306  return result;
307 }
308 
309 void algorithm::float_from_bytes(long double& value, const unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
310 {
311  // get sign of the number from the first bit
312  const int value_sign = (*ptr & 0x80) ? -1 : 1;
313 
314  // build exponent value from bitstream
315  unsigned char mask = 0x80;
316  boost::int16_t exponent = 0;
317  for (size_t n = 0; n < num_exp_bits; ++n) {
318  SHIFT_BITMASK(ptr, mask);
319  exponent *= 2;
320  if (*ptr & mask)
321  exponent += 1;
322  }
323 
324  // build significand from bitstream
325  long double significand = exponent ? 1.0 : 0.0;
326  long double significand_value = 1.0;
327  while (num_fraction_bits) {
328  SHIFT_BITMASK(ptr, mask);
329  significand_value /= 2;
330  if (*ptr & mask)
331  significand += significand_value;
332  --num_fraction_bits;
333  }
334 
335  // calculate final value
336  exponent -= (::pow((long double)2, (int)(num_exp_bits - 1)) - 1);
337  value = value_sign * significand * ::pow((long double)2, exponent);
338 }
339 
340 void algorithm::float_to_bytes(long double value, unsigned char *buf, size_t num_exp_bits, size_t num_fraction_bits)
341 {
342  // first initialize output buffer to zeros
343  unsigned char *ptr = buf;
344  memset(ptr, 0x00, ::ceil(static_cast<float>(num_exp_bits + num_fraction_bits + 1) / 8));
345 
346  // initialize first byte starting with sign of number
347  if (value < 0) {
348  *ptr = 0x80;
349  value *= -1;
350  }
351 
352  // break down numbers >= 1.0 by incrementing the exponent & dividing by 2
353  boost::int16_t exponent = 0;
354  while (value >= 1) {
355  value /= 2;
356  ++exponent;
357  }
358 
359  // skip past exponent bits because we don't know the value yet
360  unsigned char mask = 0x40;
361  for (size_t n = num_exp_bits; n > 0; --n) {
362  if (n >= 8) {
363  ++ptr;
364  n -= 7;
365  } else {
366  SHIFT_BITMASK(ptr, mask);
367  }
368  }
369 
370  // serialize fractional value < 1.0
371  bool got_exponent = false;
372  boost::uint16_t num_bits = 0;
373  while (value && num_bits < num_fraction_bits) {
374  value *= 2;
375  if (got_exponent) {
376  if (value >= 1.0) {
377  *ptr |= mask;
378  value -= 1.0;
379  }
380  SHIFT_BITMASK(ptr, mask);
381  ++num_bits;
382  } else {
383  --exponent;
384  if (value >= 1.0) {
385  value -= 1.0;
386  got_exponent = true;
387  }
388  }
389  }
390 
391  // normalize exponent.
392  // note: we should have a zero exponent if value == 0
393  boost::int32_t high_bit = ::pow((long double)2, (int)(num_exp_bits - 1));
394  if (got_exponent)
395  exponent += (high_bit - 1);
396  else
397  exponent = 0;
398 
399  // serialize exponent bits
400  ptr = buf;
401  mask = 0x80;
402  for (size_t n = 0; n < num_exp_bits; ++n) {
403  SHIFT_BITMASK(ptr, mask);
404  if (exponent >= high_bit) {
405  *ptr |= mask;
406  exponent -= high_bit;
407  }
408  high_bit /= 2;
409  }
410 }
411 
412 } // end namespace pion
static void float_to_bytes(long double value, unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
Definition: algorithm.cpp:340
static std::string url_encode(const std::string &str)
encodes strings so that they are safe for URLs (with%20spaces)
Definition: algorithm.cpp:186
static bool base64_decode(std::string const &input, std::string &output)
Definition: algorithm.cpp:24
static std::string xml_encode(const std::string &str)
TODO: escapes XML/HTML-encoded strings (1 < 2)
Definition: algorithm.cpp:225
static std::string url_decode(const std::string &str)
escapes URL-encoded strings (a%20value+with%20spaces)
Definition: algorithm.cpp:153
static bool base64_encode(std::string const &input, std::string &output)
Definition: algorithm.cpp:104
static void float_from_bytes(long double &value, const unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
Definition: algorithm.cpp:309