tesseract  4.1.0
genericvector.h
Go to the documentation of this file.
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 //
6 // (C) Copyright 2007, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 //
19 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
20 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
21 
22 #include <algorithm>
23 #include <cassert>
24 #include <cstdio>
25 #include <cstdlib>
26 
27 #include "helpers.h"
28 #include "serialis.h"
29 #include "strngs.h"
30 #include "tesscallback.h"
31 
32 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
33 // provides automatic deletion of pointers, [De]Serialize that works, and
34 // sort that works.
35 template <typename T>
36 class GenericVector {
37  public:
40  }
41  GenericVector(int size, const T& init_val) {
42  init(size);
43  init_to_size(size, init_val);
44  }
45 
46  // Copy
47  GenericVector(const GenericVector& other) {
48  this->init(other.size());
49  this->operator+=(other);
50  }
53 
55 
56  // Reserve some memory.
57  void reserve(int size);
58  // Double the size of the internal array.
59  void double_the_size();
60 
61  // Resizes to size and sets all values to t.
62  void init_to_size(int size, const T& t);
63  // Resizes to size without any initialization.
64  void resize_no_init(int size) {
65  reserve(size);
66  size_used_ = size;
67  }
68 
69  // Return the size used.
70  int size() const {
71  return size_used_;
72  }
73  // Workaround to avoid g++ -Wsign-compare warnings.
74  size_t unsigned_size() const {
75  static_assert(sizeof(size_used_) <= sizeof(size_t),
76  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
77  assert(0 <= size_used_);
78  return static_cast<size_t>(size_used_);
79  }
80  int size_reserved() const {
81  return size_reserved_;
82  }
83 
84  int length() const {
85  return size_used_;
86  }
87 
88  // Return true if empty.
89  bool empty() const {
90  return size_used_ == 0;
91  }
92 
93  // Return the object from an index.
94  T& get(int index) const;
95  T& back() const;
96  T& operator[](int index) const;
97  // Returns the last object and removes it.
98  T pop_back();
99 
100  // Return the index of the T object.
101  // This method NEEDS a compare_callback to be passed to
102  // set_compare_callback.
103  int get_index(const T& object) const;
104 
105  // Return true if T is in the array
106  bool contains(const T& object) const;
107 
108  // Return true if the index is valid
109  T contains_index(int index) const;
110 
111  // Push an element in the end of the array
112  int push_back(T object);
113  void operator+=(const T& t);
114 
115  // Push an element in the end of the array if the same
116  // element is not already contained in the array.
117  int push_back_new(const T& object);
118 
119  // Push an element in the front of the array
120  // Note: This function is O(n)
121  int push_front(const T& object);
122 
123  // Set the value at the given index
124  void set(const T& t, int index);
125 
126  // Insert t at the given index, push other elements to the right.
127  void insert(const T& t, int index);
128 
129  // Removes an element at the given index and
130  // shifts the remaining elements to the left.
131  void remove(int index);
132 
133  // Truncates the array to the given size by removing the end.
134  // If the current size is less, the array is not expanded.
135  void truncate(int size) {
136  if (size < size_used_) {
137  size_used_ = size;
138  }
139  }
140 
141  // Add a callback to be called to delete the elements when the array took
142  // their ownership.
144  clear_cb_ = cb;
145  }
146 
147  // Add a callback to be called to compare the elements when needed (contains,
148  // get_id, ...)
150  compare_cb_ = cb;
151  }
152 
153  // Clear the array, calling the clear callback function if any.
154  // All the owned callbacks are also deleted.
155  // If you don't want the callbacks to be deleted, before calling clear, set
156  // the callback to nullptr.
157  void clear();
158 
159  // Delete objects pointed to by data_[i]
160  void delete_data_pointers();
161 
162  // This method clears the current object, then, does a shallow copy of
163  // its argument, and finally invalidates its argument.
164  // Callbacks are moved to the current object;
165  void move(GenericVector<T>* from);
166 
167  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
168  // The callback given must be permanent since they will be called more than
169  // once. The given callback will be deleted at the end.
170  // If the callbacks are nullptr, then the data is simply read/written using
171  // fread (and swapping)/fwrite.
172  // Returns false on error or if the callback returns false.
173  // DEPRECATED. Use [De]Serialize[Classes] instead.
174  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const;
175  bool read(tesseract::TFile* f,
177  // Writes a vector of simple types to the given file. Assumes that bitwise
178  // read/write of T will work. Returns false in case of error.
179  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
180  bool Serialize(FILE* fp) const;
181  bool Serialize(tesseract::TFile* fp) const;
182  // Reads a vector of simple types from the given file. Assumes that bitwise
183  // read/write will work with ReverseN according to sizeof(T).
184  // Returns false in case of error.
185  // If swap is true, assumes a big/little-endian swap is needed.
186  // TFile is assumed to know about swapping.
187  bool DeSerialize(bool swap, FILE* fp);
188  bool DeSerialize(tesseract::TFile* fp);
189  // Skips the deserialization of the vector.
190  static bool SkipDeSerialize(tesseract::TFile* fp);
191  // Writes a vector of classes to the given file. Assumes the existence of
192  // bool T::Serialize(FILE* fp) const that returns false in case of error.
193  // Returns false in case of error.
194  bool SerializeClasses(FILE* fp) const;
195  bool SerializeClasses(tesseract::TFile* fp) const;
196  // Reads a vector of classes from the given file. Assumes the existence of
197  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
198  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
199  // this function. Returns false in case of error.
200  // If swap is true, assumes a big/little-endian swap is needed.
201  bool DeSerializeClasses(bool swap, FILE* fp);
203  // Calls SkipDeSerialize on the elements of the vector.
204  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
205 
206  // Allocates a new array of double the current_size, copies over the
207  // information from data to the new location, deletes data and returns
208  // the pointed to the new larger array.
209  // This function uses memcpy to copy the data, instead of invoking
210  // operator=() for each element like double_the_size() does.
211  static T* double_the_size_memcpy(int current_size, T* data) {
212  T* data_new = new T[current_size * 2];
213  memcpy(data_new, data, sizeof(T) * current_size);
214  delete[] data;
215  return data_new;
216  }
217 
218  // Reverses the elements of the vector.
219  void reverse() {
220  for (int i = 0; i < size_used_ / 2; ++i) {
221  Swap(&data_[i], &data_[size_used_ - 1 - i]);
222  }
223  }
224 
225  // Sorts the members of this vector using the less than comparator (cmp_lt),
226  // which compares the values. Useful for GenericVectors to primitive types.
227  // Will not work so great for pointers (unless you just want to sort some
228  // pointers). You need to provide a specialization to sort_cmp to use
229  // your type.
230  void sort();
231 
232  // Sort the array into the order defined by the qsort function comparator.
233  // The comparator function is as defined by qsort, ie. it receives pointers
234  // to two Ts and returns negative if the first element is to appear earlier
235  // in the result and positive if it is to appear later, with 0 for equal.
236  void sort(int (*comparator)(const void*, const void*)) {
237  qsort(data_, size_used_, sizeof(*data_), comparator);
238  }
239 
240  // Searches the array (assuming sorted in ascending order, using sort()) for
241  // an element equal to target and returns true if it is present.
242  // Use binary_search to get the index of target, or its nearest candidate.
243  bool bool_binary_search(const T& target) const {
244  int index = binary_search(target);
245  if (index >= size_used_) {
246  return false;
247  }
248  return data_[index] == target;
249  }
250  // Searches the array (assuming sorted in ascending order, using sort()) for
251  // an element equal to target and returns the index of the best candidate.
252  // The return value is conceptually the largest index i such that
253  // data_[i] <= target or 0 if target < the whole vector.
254  // NOTE that this function uses operator> so really the return value is
255  // the largest index i such that data_[i] > target is false.
256  int binary_search(const T& target) const {
257  int bottom = 0;
258  int top = size_used_;
259  while (top - bottom > 1) {
260  int middle = (bottom + top) / 2;
261  if (data_[middle] > target) {
262  top = middle;
263  } else {
264  bottom = middle;
265  }
266  }
267  return bottom;
268  }
269 
270  // Compact the vector by deleting elements using operator!= on basic types.
271  // The vector must be sorted.
272  void compact_sorted() {
273  if (size_used_ == 0) {
274  return;
275  }
276 
277  // First element is in no matter what, hence the i = 1.
278  int last_write = 0;
279  for (int i = 1; i < size_used_; ++i) {
280  // Finds next unique item and writes it.
281  if (data_[last_write] != data_[i]) {
282  data_[++last_write] = data_[i];
283  }
284  }
285  // last_write is the index of a valid data cell, so add 1.
286  size_used_ = last_write + 1;
287  }
288 
289  // Compact the vector by deleting elements for which delete_cb returns
290  // true. delete_cb is a permanent callback and will be deleted.
292  int new_size = 0;
293  int old_index = 0;
294  // Until the callback returns true, the elements stay the same.
295  while (old_index < size_used_ && !delete_cb->Run(old_index++)) {
296  ++new_size;
297  }
298  // Now just copy anything else that gets false from delete_cb.
299  for (; old_index < size_used_; ++old_index) {
300  if (!delete_cb->Run(old_index)) {
301  data_[new_size++] = data_[old_index];
302  }
303  }
304  size_used_ = new_size;
305  delete delete_cb;
306  }
307 
308  T dot_product(const GenericVector<T>& other) const {
309  T result = static_cast<T>(0);
310  for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i) {
311  result += data_[i] * other.data_[i];
312  }
313  return result;
314  }
315 
316  // Returns the index of what would be the target_index_th item in the array
317  // if the members were sorted, without actually sorting. Members are
318  // shuffled around, but it takes O(n) time.
319  // NOTE: uses operator< and operator== on the members.
320  int choose_nth_item(int target_index) {
321  // Make sure target_index is legal.
322  if (target_index < 0) {
323  target_index = 0; // ensure legal
324  } else if (target_index >= size_used_) {
325  target_index = size_used_ - 1;
326  }
327  unsigned int seed = 1;
328  return choose_nth_item(target_index, 0, size_used_, &seed);
329  }
330 
331  // Swaps the elements with the given indices.
332  void swap(int index1, int index2) {
333  if (index1 != index2) {
334  T tmp = data_[index1];
335  data_[index1] = data_[index2];
336  data_[index2] = tmp;
337  }
338  }
339  // Returns true if all elements of *this are within the given range.
340  // Only uses operator<
341  bool WithinBounds(const T& rangemin, const T& rangemax) const {
342  for (int i = 0; i < size_used_; ++i) {
343  if (data_[i] < rangemin || rangemax < data_[i]) {
344  return false;
345  }
346  }
347  return true;
348  }
349 
350  protected:
351  // Internal recursive version of choose_nth_item.
352  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
353 
354  // Init the object, allocating size memory.
355  void init(int size);
356 
357  // We are assuming that the object generally placed in the
358  // vector are small enough that for efficiency it makes sense
359  // to start with a larger initial size.
360  static const int kDefaultVectorSize = 4;
361  int32_t size_used_{};
362  int32_t size_reserved_{};
363  T* data_;
365  // Mutable because Run method is not const
367 };
368 
369 namespace tesseract {
370 
371 // The default FileReader loads the whole file into the vector of char,
372 // returning false on error.
373 inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
374  bool result = false;
375  FILE* fp = fopen(filename, "rb");
376  if (fp != nullptr) {
377  fseek(fp, 0, SEEK_END);
378  long size = ftell(fp);
379  fseek(fp, 0, SEEK_SET);
380  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
381  if (size > 0 && size < LONG_MAX) {
382  // reserve an extra byte in case caller wants to append a '\0' character
383  data->reserve(size + 1);
384  data->resize_no_init(size);
385  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
386  }
387  fclose(fp);
388  }
389  return result;
390 }
391 
392 inline bool LoadDataFromFile(const STRING& filename,
393  GenericVector<char>* data) {
394  return LoadDataFromFile(filename.string(), data);
395 }
396 
397 // The default FileWriter writes the vector of char to the filename file,
398 // returning false on error.
399 inline bool SaveDataToFile(const GenericVector<char>& data,
400  const STRING& filename) {
401  FILE* fp = fopen(filename.string(), "wb");
402  if (fp == nullptr) {
403  return false;
404  }
405  bool result =
406  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
407  fclose(fp);
408  return result;
409 }
410 // Reads a file as a vector of STRING.
411 inline bool LoadFileLinesToStrings(const STRING& filename,
412  GenericVector<STRING>* lines) {
413  GenericVector<char> data;
414  if (!LoadDataFromFile(filename.string(), &data)) {
415  return false;
416  }
417  STRING lines_str(&data[0], data.size());
418  lines_str.split('\n', lines);
419  return true;
420 }
421 
422 template <typename T>
423 bool cmp_eq(T const& t1, T const& t2) {
424  return t1 == t2;
425 }
426 
427 // Used by sort()
428 // return < 0 if t1 < t2
429 // return 0 if t1 == t2
430 // return > 0 if t1 > t2
431 template <typename T>
432 int sort_cmp(const void* t1, const void* t2) {
433  const T* a = static_cast<const T*>(t1);
434  const T* b = static_cast<const T*>(t2);
435  if (*a < *b) {
436  return -1;
437  }
438  if (*b < *a) {
439  return 1;
440  }
441  return 0;
442 }
443 
444 // Used by PointerVector::sort()
445 // return < 0 if t1 < t2
446 // return 0 if t1 == t2
447 // return > 0 if t1 > t2
448 template <typename T>
449 int sort_ptr_cmp(const void* t1, const void* t2) {
450  const T* a = *static_cast<T* const*>(t1);
451  const T* b = *static_cast<T* const*>(t2);
452  if (*a < *b) {
453  return -1;
454  }
455  if (*b < *a) {
456  return 1;
457  }
458  return 0;
459 }
460 
461 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
462 // as it provides automatic deletion and correct serialization, with the
463 // corollary that all copy operations are deep copies of the pointed-to objects.
464 template <typename T>
465 class PointerVector : public GenericVector<T*> {
466  public:
468  explicit PointerVector(int size) : GenericVector<T*>(size) {}
470  // Clear must be called here, even though it is called again by the base,
471  // as the base will call the wrong clear.
472  clear();
473  }
474  // Copy must be deep, as the pointers will be automatically deleted on
475  // destruction.
476  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
477  this->init(other.size());
478  this->operator+=(other);
479  }
481  this->reserve(this->size_used_ + other.size_used_);
482  for (int i = 0; i < other.size(); ++i) {
483  this->push_back(new T(*other.data_[i]));
484  }
485  return *this;
486  }
487 
489  if (&other != this) {
490  this->truncate(0);
491  this->operator+=(other);
492  }
493  return *this;
494  }
495 
496  // Removes an element at the given index and
497  // shifts the remaining elements to the left.
498  void remove(int index) {
499  delete GenericVector<T*>::data_[index];
501  }
502 
503  // Truncates the array to the given size by removing the end.
504  // If the current size is less, the array is not expanded.
505  void truncate(int size) {
506  for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
507  delete GenericVector<T*>::data_[i];
508  }
510  }
511 
512  // Compact the vector by deleting elements for which delete_cb returns
513  // true. delete_cb is a permanent callback and will be deleted.
515  int new_size = 0;
516  int old_index = 0;
517  // Until the callback returns true, the elements stay the same.
518  while (old_index < GenericVector<T*>::size_used_ &&
519  !delete_cb->Run(GenericVector<T*>::data_[old_index++])) {
520  ++new_size;
521  }
522  // Now just copy anything else that gets false from delete_cb.
523  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
524  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
525  GenericVector<T*>::data_[new_size++] =
526  GenericVector<T*>::data_[old_index];
527  } else {
528  delete GenericVector<T*>::data_[old_index];
529  }
530  }
532  delete delete_cb;
533  }
534 
535  // Clear the array, calling the clear callback function if any.
536  // All the owned callbacks are also deleted.
537  // If you don't want the callbacks to be deleted, before calling clear, set
538  // the callback to nullptr.
539  void clear() {
542  }
543 
544  // Writes a vector of (pointers to) classes to the given file. Assumes the
545  // existence of bool T::Serialize(FILE*) const that returns false in case of
546  // error. There is no Serialize for simple types, as you would have a
547  // normal GenericVector of those.
548  // Returns false in case of error.
549  bool Serialize(FILE* fp) const {
550  int32_t used = GenericVector<T*>::size_used_;
551  if (fwrite(&used, sizeof(used), 1, fp) != 1) {
552  return false;
553  }
554  for (int i = 0; i < used; ++i) {
555  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
556  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
557  return false;
558  }
559  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
560  return false;
561  }
562  }
563  return true;
564  }
565  bool Serialize(TFile* fp) const {
566  int32_t used = GenericVector<T*>::size_used_;
567  if (fp->FWrite(&used, sizeof(used), 1) != 1) {
568  return false;
569  }
570  for (int i = 0; i < used; ++i) {
571  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
572  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
573  return false;
574  }
575  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
576  return false;
577  }
578  }
579  return true;
580  }
581  // Reads a vector of (pointers to) classes to the given file. Assumes the
582  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
583  // case of error. There is no Serialize for simple types, as you would have a
584  // normal GenericVector of those.
585  // If swap is true, assumes a big/little-endian swap is needed.
586  // Also needs T::T(), as new T is used in this function.
587  // Returns false in case of error.
588  bool DeSerialize(bool swap, FILE* fp) {
589  uint32_t reserved;
590  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
591  return false;
592  }
593  if (swap) {
594  Reverse32(&reserved);
595  }
596  // Arbitrarily limit the number of elements to protect against bad data.
597  assert(reserved <= UINT16_MAX);
598  if (reserved > UINT16_MAX) {
599  return false;
600  }
601  GenericVector<T*>::reserve(reserved);
602  truncate(0);
603  for (uint32_t i = 0; i < reserved; ++i) {
604  int8_t non_null;
605  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) {
606  return false;
607  }
608  T* item = nullptr;
609  if (non_null != 0) {
610  item = new T;
611  if (!item->DeSerialize(swap, fp)) {
612  delete item;
613  return false;
614  }
615  this->push_back(item);
616  } else {
617  // Null elements should keep their place in the vector.
618  this->push_back(nullptr);
619  }
620  }
621  return true;
622  }
623  bool DeSerialize(TFile* fp) {
624  int32_t reserved;
625  if (!DeSerializeSize(fp, &reserved)) {
626  return false;
627  }
628  GenericVector<T*>::reserve(reserved);
629  truncate(0);
630  for (int i = 0; i < reserved; ++i) {
631  if (!DeSerializeElement(fp)) {
632  return false;
633  }
634  }
635  return true;
636  }
637  // Enables deserialization of a selection of elements. Note that in order to
638  // retain the integrity of the stream, the caller must call some combination
639  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
640  // *size, assuming a true return.
641  static bool DeSerializeSize(TFile* fp, int32_t* size) {
642  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
643  }
644  // Reads and appends to the vector the next element of the serialization.
646  int8_t non_null;
647  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
648  return false;
649  }
650  T* item = nullptr;
651  if (non_null != 0) {
652  item = new T;
653  if (!item->DeSerialize(fp)) {
654  delete item;
655  return false;
656  }
657  this->push_back(item);
658  } else {
659  // Null elements should keep their place in the vector.
660  this->push_back(nullptr);
661  }
662  return true;
663  }
664  // Skips the next element of the serialization.
665  static bool DeSerializeSkip(TFile* fp) {
666  int8_t non_null;
667  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
668  return false;
669  }
670  if (non_null != 0) {
671  if (!T::SkipDeSerialize(fp)) {
672  return false;
673  }
674  }
675  return true;
676  }
677 
678  // Sorts the items pointed to by the members of this vector using
679  // t::operator<().
680  void sort() {
681  this->GenericVector<T*>::sort(&sort_ptr_cmp<T>);
682  }
683 };
684 
685 } // namespace tesseract
686 
687 // A useful vector that uses operator== to do comparisons.
688 template <typename T>
689 class GenericVectorEqEq : public GenericVector<T> {
690  public:
693  NewPermanentTessCallback(tesseract::cmp_eq<T>));
694  }
695  explicit GenericVectorEqEq(int size) : GenericVector<T>(size) {
697  NewPermanentTessCallback(tesseract::cmp_eq<T>));
698  }
699 };
700 
701 template <typename T>
702 void GenericVector<T>::init(int size) {
703  size_used_ = 0;
704  if (size <= 0) {
705  data_ = nullptr;
706  size_reserved_ = 0;
707  } else {
708  if (size < kDefaultVectorSize) {
709  size = kDefaultVectorSize;
710  }
711  data_ = new T[size];
713  }
714  clear_cb_ = nullptr;
715  compare_cb_ = nullptr;
716 }
717 
718 template <typename T>
720  clear();
721 }
722 
723 // Reserve some memory. If the internal array contains elements, they are
724 // copied.
725 template <typename T>
727  if (size_reserved_ >= size || size <= 0) {
728  return;
729  }
730  if (size < kDefaultVectorSize) {
731  size = kDefaultVectorSize;
732  }
733  T* new_array = new T[size];
734  for (int i = 0; i < size_used_; ++i) {
735  new_array[i] = data_[i];
736  }
737  delete[] data_;
738  data_ = new_array;
740 }
741 
742 template <typename T>
744  if (size_reserved_ == 0) {
746  } else {
747  reserve(2 * size_reserved_);
748  }
749 }
750 
751 // Resizes to size and sets all values to t.
752 template <typename T>
753 void GenericVector<T>::init_to_size(int size, const T& t) {
754  reserve(size);
755  size_used_ = size;
756  for (int i = 0; i < size; ++i) {
757  data_[i] = t;
758  }
759 }
760 
761 // Return the object from an index.
762 template <typename T>
763 T& GenericVector<T>::get(int index) const {
764  assert(index >= 0 && index < size_used_);
765  return data_[index];
766 }
767 
768 template <typename T>
769 T& GenericVector<T>::operator[](int index) const {
770  assert(index >= 0 && index < size_used_);
771  return data_[index];
772 }
773 
774 template <typename T>
776  assert(size_used_ > 0);
777  return data_[size_used_ - 1];
778 }
779 // Returns the last object and removes it.
780 template <typename T>
782  assert(size_used_ > 0);
783  return data_[--size_used_];
784 }
785 
786 // Return the object from an index.
787 template <typename T>
788 void GenericVector<T>::set(const T& t, int index) {
789  assert(index >= 0 && index < size_used_);
790  data_[index] = t;
791 }
792 
793 // Shifts the rest of the elements to the right to make
794 // space for the new elements and inserts the given element
795 // at the specified index.
796 template <typename T>
797 void GenericVector<T>::insert(const T& t, int index) {
798  assert(index >= 0 && index <= size_used_);
799  if (size_reserved_ == size_used_) {
800  double_the_size();
801  }
802  for (int i = size_used_; i > index; --i) {
803  data_[i] = data_[i - 1];
804  }
805  data_[index] = t;
806  size_used_++;
807 }
808 
809 // Removes an element at the given index and
810 // shifts the remaining elements to the left.
811 template <typename T>
812 void GenericVector<T>::remove(int index) {
813  assert(index >= 0 && index < size_used_);
814  for (int i = index; i < size_used_ - 1; ++i) {
815  data_[i] = data_[i + 1];
816  }
817  size_used_--;
818 }
819 
820 // Return true if the index is valindex
821 template <typename T>
823  return index >= 0 && index < size_used_;
824 }
825 
826 // Return the index of the T object.
827 template <typename T>
828 int GenericVector<T>::get_index(const T& object) const {
829  for (int i = 0; i < size_used_; ++i) {
830  assert(compare_cb_ != nullptr);
831  if (compare_cb_->Run(object, data_[i])) {
832  return i;
833  }
834  }
835  return -1;
836 }
837 
838 // Return true if T is in the array
839 template <typename T>
840 bool GenericVector<T>::contains(const T& object) const {
841  return get_index(object) != -1;
842 }
843 
844 // Add an element in the array
845 template <typename T>
847  int index = 0;
848  if (size_used_ == size_reserved_) {
849  double_the_size();
850  }
851  index = size_used_++;
852  data_[index] = object;
853  return index;
854 }
855 
856 template <typename T>
857 int GenericVector<T>::push_back_new(const T& object) {
858  int index = get_index(object);
859  if (index >= 0) {
860  return index;
861  }
862  return push_back(object);
863 }
864 
865 // Add an element in the array (front)
866 template <typename T>
867 int GenericVector<T>::push_front(const T& object) {
868  if (size_used_ == size_reserved_) {
869  double_the_size();
870  }
871  for (int i = size_used_; i > 0; --i) {
872  data_[i] = data_[i - 1];
873  }
874  data_[0] = object;
875  ++size_used_;
876  return 0;
877 }
878 
879 template <typename T>
881  push_back(t);
882 }
883 
884 template <typename T>
886  this->reserve(size_used_ + other.size_used_);
887  for (int i = 0; i < other.size(); ++i) {
888  this->operator+=(other.data_[i]);
889  }
890  return *this;
891 }
892 
893 template <typename T>
895  if (&other != this) {
896  this->truncate(0);
897  this->operator+=(other);
898  }
899  return *this;
900 }
901 
902 // Clear the array, calling the callback function if any.
903 template <typename T>
905  if (size_reserved_ > 0 && clear_cb_ != nullptr) {
906  for (int i = 0; i < size_used_; ++i) {
907  clear_cb_->Run(data_[i]);
908  }
909  }
910  delete[] data_;
911  data_ = nullptr;
912  size_used_ = 0;
913  size_reserved_ = 0;
914  delete clear_cb_;
915  clear_cb_ = nullptr;
916  delete compare_cb_;
917  compare_cb_ = nullptr;
918 }
919 
920 template <typename T>
922  for (int i = 0; i < size_used_; ++i) {
923  delete data_[i];
924  }
925 }
926 
927 template <typename T>
929  FILE* f, TessResultCallback2<bool, FILE*, T const&>* cb) const {
930  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
931  return false;
932  }
933  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
934  return false;
935  }
936  if (cb != nullptr) {
937  for (int i = 0; i < size_used_; ++i) {
938  if (!cb->Run(f, data_[i])) {
939  delete cb;
940  return false;
941  }
942  }
943  delete cb;
944  } else {
945  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
946  return false;
947  }
948  }
949  return true;
950 }
951 
952 template <typename T>
955  int32_t reserved;
956  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
957  return false;
958  }
959  reserve(reserved);
960  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
961  return false;
962  }
963  if (cb != nullptr) {
964  for (int i = 0; i < size_used_; ++i) {
965  if (!cb->Run(f, data_ + i)) {
966  delete cb;
967  return false;
968  }
969  }
970  delete cb;
971  } else {
972  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
973  return false;
974  }
975  }
976  return true;
977 }
978 
979 // Writes a vector of simple types to the given file. Assumes that bitwise
980 // read/write of T will work. Returns false in case of error.
981 template <typename T>
982 bool GenericVector<T>::Serialize(FILE* fp) const {
983  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
984  return false;
985  }
986  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
987  return false;
988  }
989  return true;
990 }
991 template <typename T>
993  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
994  return false;
995  }
996  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
997  return false;
998  }
999  return true;
1000 }
1001 
1002 // Reads a vector of simple types from the given file. Assumes that bitwise
1003 // read/write will work with ReverseN according to sizeof(T).
1004 // Returns false in case of error.
1005 // If swap is true, assumes a big/little-endian swap is needed.
1006 template <typename T>
1007 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
1008  uint32_t reserved;
1009  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1010  return false;
1011  }
1012  if (swap) {
1013  Reverse32(&reserved);
1014  }
1015  // Arbitrarily limit the number of elements to protect against bad data.
1016  assert(reserved <= UINT16_MAX);
1017  if (reserved > UINT16_MAX) {
1018  return false;
1019  }
1020  reserve(reserved);
1021  size_used_ = reserved;
1022  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
1023  return false;
1024  }
1025  if (swap) {
1026  for (int i = 0; i < size_used_; ++i) {
1027  ReverseN(&data_[i], sizeof(data_[i]));
1028  }
1029  }
1030  return true;
1031 }
1032 template <typename T>
1034  uint32_t reserved;
1035  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1036  return false;
1037  }
1038  // Arbitrarily limit the number of elements to protect against bad data.
1039  const uint32_t limit = 50000000;
1040  assert(reserved <= limit);
1041  if (reserved > limit) {
1042  return false;
1043  }
1044  reserve(reserved);
1045  size_used_ = reserved;
1046  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
1047 }
1048 template <typename T>
1050  uint32_t reserved;
1051  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1052  return false;
1053  }
1054  return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
1055 }
1056 
1057 // Writes a vector of classes to the given file. Assumes the existence of
1058 // bool T::Serialize(FILE* fp) const that returns false in case of error.
1059 // Returns false in case of error.
1060 template <typename T>
1062  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
1063  return false;
1064  }
1065  for (int i = 0; i < size_used_; ++i) {
1066  if (!data_[i].Serialize(fp)) {
1067  return false;
1068  }
1069  }
1070  return true;
1071 }
1072 template <typename T>
1074  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
1075  return false;
1076  }
1077  for (int i = 0; i < size_used_; ++i) {
1078  if (!data_[i].Serialize(fp)) {
1079  return false;
1080  }
1081  }
1082  return true;
1083 }
1084 
1085 // Reads a vector of classes from the given file. Assumes the existence of
1086 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
1087 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1088 // this function. Returns false in case of error.
1089 // If swap is true, assumes a big/little-endian swap is needed.
1090 template <typename T>
1091 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1092  int32_t reserved;
1093  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1094  return false;
1095  }
1096  if (swap) {
1097  Reverse32(&reserved);
1098  }
1099  T empty;
1100  init_to_size(reserved, empty);
1101  for (int i = 0; i < reserved; ++i) {
1102  if (!data_[i].DeSerialize(swap, fp)) {
1103  return false;
1104  }
1105  }
1106  return true;
1107 }
1108 template <typename T>
1110  int32_t reserved;
1111  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1112  return false;
1113  }
1114  T empty;
1115  init_to_size(reserved, empty);
1116  for (int i = 0; i < reserved; ++i) {
1117  if (!data_[i].DeSerialize(fp)) {
1118  return false;
1119  }
1120  }
1121  return true;
1122 }
1123 template <typename T>
1125  int32_t reserved;
1126  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1127  return false;
1128  }
1129  for (int i = 0; i < reserved; ++i) {
1130  if (!T::SkipDeSerialize(fp)) {
1131  return false;
1132  }
1133  }
1134  return true;
1135 }
1136 
1137 // This method clear the current object, then, does a shallow copy of
1138 // its argument, and finally invalidates its argument.
1139 template <typename T>
1141  this->clear();
1142  this->data_ = from->data_;
1143  this->size_reserved_ = from->size_reserved_;
1144  this->size_used_ = from->size_used_;
1145  this->compare_cb_ = from->compare_cb_;
1146  this->clear_cb_ = from->clear_cb_;
1147  from->data_ = nullptr;
1148  from->clear_cb_ = nullptr;
1149  from->compare_cb_ = nullptr;
1150  from->size_used_ = 0;
1151  from->size_reserved_ = 0;
1152 }
1153 
1154 template <typename T>
1156  sort(&tesseract::sort_cmp<T>);
1157 }
1158 
1159 // Internal recursive version of choose_nth_item.
1160 // The algorithm used comes from "Algorithms" by Sedgewick:
1161 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1162 // The principle is to choose a random pivot, and move everything less than
1163 // the pivot to its left, and everything greater than the pivot to the end
1164 // of the array, then recurse on the part that contains the desired index, or
1165 // just return the answer if it is in the equal section in the middle.
1166 // The random pivot guarantees average linear time for the same reason that
1167 // n times vector::push_back takes linear time on average.
1168 // target_index, start and and end are all indices into the full array.
1169 // Seed is a seed for rand_r for thread safety purposes. Its value is
1170 // unimportant as the random numbers do not affect the result except
1171 // between equal answers.
1172 template <typename T>
1173 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1174  unsigned int* seed) {
1175  // Number of elements to process.
1176  int num_elements = end - start;
1177  // Trivial cases.
1178  if (num_elements <= 1) {
1179  return start;
1180  }
1181  if (num_elements == 2) {
1182  if (data_[start] < data_[start + 1]) {
1183  return target_index > start ? start + 1 : start;
1184  }
1185  return target_index > start ? start : start + 1;
1186  }
1187 // Place the pivot at start.
1188 #ifndef rand_r // _MSC_VER, ANDROID
1189  srand(*seed);
1190 # define rand_r(seed) rand()
1191 #endif // _MSC_VER
1192  int pivot = rand_r(seed) % num_elements + start;
1193  swap(pivot, start);
1194  // The invariant condition here is that items [start, next_lesser) are less
1195  // than the pivot (which is at index next_lesser) and items
1196  // [prev_greater, end) are greater than the pivot, with items
1197  // [next_lesser, prev_greater) being equal to the pivot.
1198  int next_lesser = start;
1199  int prev_greater = end;
1200  for (int next_sample = start + 1; next_sample < prev_greater;) {
1201  if (data_[next_sample] < data_[next_lesser]) {
1202  swap(next_lesser++, next_sample++);
1203  } else if (data_[next_sample] == data_[next_lesser]) {
1204  ++next_sample;
1205  } else {
1206  swap(--prev_greater, next_sample);
1207  }
1208  }
1209  // Now the invariant is set up, we recurse on just the section that contains
1210  // the desired index.
1211  if (target_index < next_lesser) {
1212  return choose_nth_item(target_index, start, next_lesser, seed);
1213  }
1214  if (target_index < prev_greater) {
1215  return next_lesser; // In equal bracket.
1216  }
1217  return choose_nth_item(target_index, prev_greater, end, seed);
1218 }
1219 
1220 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
TessCallback1< T > * clear_cb_
bool DeSerialize(bool swap, FILE *fp)
void double_the_size()
void delete_data_pointers()
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:270
void swap(int index1, int index2)
bool LoadFileLinesToStrings(const STRING &filename, GenericVector< STRING > *lines)
static bool SkipDeSerialize(tesseract::TFile *fp)
void compact(TessResultCallback1< bool, const T * > *delete_cb)
void Swap(T *p1, T *p2)
Definition: helpers.h:95
int length() const
Definition: genericvector.h:84
TessResultCallback2< bool, T const &, T const & > * compare_cb_
bool DeSerialize(TFile *fp)
Definition: strngs.h:45
bool Serialize(FILE *fp) const
PointerVector< T > & operator+=(const PointerVector &other)
T & get(int index) const
bool WithinBounds(const T &rangemin, const T &rangemax) const
int32_t size_used_
void init_to_size(int size, const T &t)
void resize_no_init(int size)
Definition: genericvector.h:64
bool contains(const T &object) const
int choose_nth_item(int target_index)
T contains_index(int index) const
void set(const T &t, int index)
void set_compare_callback(TessResultCallback2< bool, T const &, T const & > *cb)
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const & > *cb) const
bool Serialize(TFile *fp) const
int32_t size_reserved_
GenericVector(int size, const T &init_val)
Definition: genericvector.h:41
int push_back_new(const T &object)
GenericVector< T > & operator=(const GenericVector &other)
void compact(TessResultCallback1< bool, int > *delete_cb)
void remove(int index)
bool SerializeClasses(FILE *fp) const
int sort_cmp(const void *t1, const void *t2)
GenericVector(const GenericVector &other)
Definition: genericvector.h:47
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T * > *cb)
PointerVector< T > & operator=(const PointerVector &other)
T dot_product(const GenericVector< T > &other) const
void reserve(int size)
bool DeSerializeClasses(bool swap, FILE *fp)
int get_index(const T &object) const
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
int FWrite(const void *buffer, size_t size, int count)
Definition: serialis.cpp:318
static bool DeSerializeSize(TFile *fp, int32_t *size)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
bool LoadDataFromFile(const STRING &filename, GenericVector< char > *data)
void set_clear_callback(TessCallback1< T > *cb)
const char * string() const
Definition: strngs.cpp:194
void truncate(int size)
bool empty() const
Definition: genericvector.h:89
int push_back(T object)
void insert(const T &t, int index)
bool DeSerialize(bool swap, FILE *fp)
virtual void Run(A1)=0
bool bool_binary_search(const T &target) const
virtual R Run(A1)=0
int sort_ptr_cmp(const void *t1, const void *t2)
size_t unsigned_size() const
Definition: genericvector.h:74
T & operator[](int index) const
int size_reserved() const
Definition: genericvector.h:80
static const int kDefaultVectorSize
bool DeSerializeElement(TFile *fp)
void move(GenericVector< T > *from)
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
int binary_search(const T &target) const
#define rand_r(seed)
bool cmp_eq(T const &t1, T const &t2)
static T * double_the_size_memcpy(int current_size, T *data)
void init(int size)
void split(char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:282
void compact_sorted()
void sort(int(*comparator)(const void *, const void *))
int push_front(const T &object)
int size() const
Definition: genericvector.h:70
PointerVector(const PointerVector &other)
virtual R Run(A1, A2)=0
GenericVectorEqEq(int size)
bool Serialize(FILE *fp) const
GenericVector< T > & operator+=(const GenericVector &other)
static bool DeSerializeSkip(TFile *fp)
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:185
void Reverse32(void *ptr)
Definition: helpers.h:202
T & back() const
int FReadEndian(void *buffer, size_t size, int count)
Definition: serialis.cpp:259