tesseract 3.04.01

ccutil/genericvector.h

Go to the documentation of this file.
00001 
00002 // File:        genericvector.h
00003 // Description: Generic vector class
00004 // Author:      Daria Antonova
00005 // Created:     Mon Jun 23 11:26:43 PDT 2008
00006 //
00007 // (C) Copyright 2007, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 //
00020 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
00021 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
00022 
00023 #include <assert.h>
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 
00027 #include "tesscallback.h"
00028 #include "errcode.h"
00029 #include "helpers.h"
00030 #include "ndminx.h"
00031 #include "serialis.h"
00032 #include "strngs.h"
00033 
00034 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
00035 // provides automatic deletion of pointers, [De]Serialize that works, and
00036 // sort that works.
00037 template <typename T>
00038 class GenericVector {
00039  public:
00040   GenericVector() {
00041     init(kDefaultVectorSize);
00042   }
00043   GenericVector(int size, T init_val) {
00044     init(size);
00045     init_to_size(size, init_val);
00046   }
00047 
00048   // Copy
00049   GenericVector(const GenericVector& other) {
00050     this->init(other.size());
00051     this->operator+=(other);
00052   }
00053   GenericVector<T> &operator+=(const GenericVector& other);
00054   GenericVector<T> &operator=(const GenericVector& other);
00055 
00056   ~GenericVector();
00057 
00058   // Reserve some memory.
00059   void reserve(int size);
00060   // Double the size of the internal array.
00061   void double_the_size();
00062 
00063   // Resizes to size and sets all values to t.
00064   void init_to_size(int size, T t);
00065   // Resizes to size without any initialization.
00066   void resize_no_init(int size) {
00067     reserve(size);
00068     size_used_ = size;
00069   }
00070 
00071   // Return the size used.
00072   int size() const {
00073     return size_used_;
00074   }
00075   int size_reserved() const {
00076     return size_reserved_;
00077   }
00078 
00079   int length() const {
00080     return size_used_;
00081   }
00082 
00083   // Return true if empty.
00084   bool empty() const {
00085     return size_used_ == 0;
00086   }
00087 
00088   // Return the object from an index.
00089   T &get(int index) const;
00090   T &back() const;
00091   T &operator[](int index) const;
00092   // Returns the last object and removes it.
00093   T pop_back();
00094 
00095   // Return the index of the T object.
00096   // This method NEEDS a compare_callback to be passed to
00097   // set_compare_callback.
00098   int get_index(T object) const;
00099 
00100   // Return true if T is in the array
00101   bool contains(T object) const;
00102 
00103   // Return true if the index is valid
00104   T contains_index(int index) const;
00105 
00106   // Push an element in the end of the array
00107   int push_back(T object);
00108   void operator+=(T t);
00109 
00110   // Push an element in the end of the array if the same
00111   // element is not already contained in the array.
00112   int push_back_new(T object);
00113 
00114   // Push an element in the front of the array
00115   // Note: This function is O(n)
00116   int push_front(T object);
00117 
00118   // Set the value at the given index
00119   void set(T t, int index);
00120 
00121   // Insert t at the given index, push other elements to the right.
00122   void insert(T t, int index);
00123 
00124   // Removes an element at the given index and
00125   // shifts the remaining elements to the left.
00126   void remove(int index);
00127 
00128   // Truncates the array to the given size by removing the end.
00129   // If the current size is less, the array is not expanded.
00130   void truncate(int size) {
00131     if (size < size_used_)
00132       size_used_ = size;
00133   }
00134 
00135   // Add a callback to be called to delete the elements when the array took
00136   // their ownership.
00137   void set_clear_callback(TessCallback1<T>* cb);
00138 
00139   // Add a callback to be called to compare the elements when needed (contains,
00140   // get_id, ...)
00141   void set_compare_callback(TessResultCallback2<bool, T const &, T const &>* cb);
00142 
00143   // Clear the array, calling the clear callback function if any.
00144   // All the owned callbacks are also deleted.
00145   // If you don't want the callbacks to be deleted, before calling clear, set
00146   // the callback to NULL.
00147   void clear();
00148 
00149   // Delete objects pointed to by data_[i]
00150   void delete_data_pointers();
00151 
00152   // This method clears the current object, then, does a shallow copy of
00153   // its argument, and finally invalidates its argument.
00154   // Callbacks are moved to the current object;
00155   void move(GenericVector<T>* from);
00156 
00157   // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
00158   // The callback given must be permanent since they will be called more than
00159   // once. The given callback will be deleted at the end.
00160   // If the callbacks are NULL, then the data is simply read/written using
00161   // fread (and swapping)/fwrite.
00162   // Returns false on error or if the callback returns false.
00163   // DEPRECATED. Use [De]Serialize[Classes] instead.
00164   bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
00165   bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
00166   // Writes a vector of simple types to the given file. Assumes that bitwise
00167   // read/write of T will work. Returns false in case of error.
00168   // TODO(rays) Change all callers to use TFile and remove deprecated methods.
00169   bool Serialize(FILE* fp) const;
00170   bool Serialize(tesseract::TFile* fp) const;
00171   // Reads a vector of simple types from the given file. Assumes that bitwise
00172   // read/write will work with ReverseN according to sizeof(T).
00173   // Returns false in case of error.
00174   // If swap is true, assumes a big/little-endian swap is needed.
00175   bool DeSerialize(bool swap, FILE* fp);
00176   bool DeSerialize(bool swap, tesseract::TFile* fp);
00177   // Writes a vector of classes to the given file. Assumes the existence of
00178   // bool T::Serialize(FILE* fp) const that returns false in case of error.
00179   // Returns false in case of error.
00180   bool SerializeClasses(FILE* fp) const;
00181   bool SerializeClasses(tesseract::TFile* fp) const;
00182   // Reads a vector of classes from the given file. Assumes the existence of
00183   // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
00184   // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
00185   // this function. Returns false in case of error.
00186   // If swap is true, assumes a big/little-endian swap is needed.
00187   bool DeSerializeClasses(bool swap, FILE* fp);
00188   bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
00189 
00190   // Allocates a new array of double the current_size, copies over the
00191   // information from data to the new location, deletes data and returns
00192   // the pointed to the new larger array.
00193   // This function uses memcpy to copy the data, instead of invoking
00194   // operator=() for each element like double_the_size() does.
00195   static T *double_the_size_memcpy(int current_size, T *data) {
00196     T *data_new = new T[current_size * 2];
00197     memcpy(data_new, data, sizeof(T) * current_size);
00198     delete[] data;
00199     return data_new;
00200   }
00201 
00202   // Reverses the elements of the vector.
00203   void reverse() {
00204     for (int i = 0; i < size_used_ / 2; ++i)
00205       Swap(&data_[i], &data_[size_used_ - 1 - i]);
00206   }
00207 
00208   // Sorts the members of this vector using the less than comparator (cmp_lt),
00209   // which compares the values. Useful for GenericVectors to primitive types.
00210   // Will not work so great for pointers (unless you just want to sort some
00211   // pointers). You need to provide a specialization to sort_cmp to use
00212   // your type.
00213   void sort();
00214 
00215   // Sort the array into the order defined by the qsort function comparator.
00216   // The comparator function is as defined by qsort, ie. it receives pointers
00217   // to two Ts and returns negative if the first element is to appear earlier
00218   // in the result and positive if it is to appear later, with 0 for equal.
00219   void sort(int (*comparator)(const void*, const void*)) {
00220     qsort(data_, size_used_, sizeof(*data_), comparator);
00221   }
00222 
00223   // Searches the array (assuming sorted in ascending order, using sort()) for
00224   // an element equal to target and returns true if it is present.
00225   // Use binary_search to get the index of target, or its nearest candidate.
00226   bool bool_binary_search(const T& target) const {
00227     int index = binary_search(target);
00228     if (index >= size_used_)
00229       return false;
00230     return data_[index] == target;
00231   }
00232   // Searches the array (assuming sorted in ascending order, using sort()) for
00233   // an element equal to target and returns the index of the best candidate.
00234   // The return value is conceptually the largest index i such that
00235   // data_[i] <= target or 0 if target < the whole vector.
00236   // NOTE that this function uses operator> so really the return value is
00237   // the largest index i such that data_[i] > target is false.
00238   int binary_search(const T& target) const {
00239     int bottom = 0;
00240     int top = size_used_;
00241     do {
00242       int middle = (bottom + top) / 2;
00243       if (data_[middle] > target)
00244         top = middle;
00245       else
00246         bottom = middle;
00247     }
00248     while (top - bottom > 1);
00249     return bottom;
00250   }
00251 
00252   // Compact the vector by deleting elements using operator!= on basic types.
00253   // The vector must be sorted.
00254   void compact_sorted() {
00255     if (size_used_ == 0)
00256       return;
00257 
00258     // First element is in no matter what, hence the i = 1.
00259     int last_write = 0;
00260     for (int i = 1; i < size_used_; ++i) {
00261       // Finds next unique item and writes it.
00262       if (data_[last_write] != data_[i])
00263         data_[++last_write] = data_[i];
00264     }
00265     // last_write is the index of a valid data cell, so add 1.
00266     size_used_ = last_write + 1;
00267   }
00268 
00269   // Compact the vector by deleting elements for which delete_cb returns
00270   // true. delete_cb is a permanent callback and will be deleted.
00271   void compact(TessResultCallback1<bool, int>* delete_cb) {
00272     int new_size = 0;
00273     int old_index = 0;
00274     // Until the callback returns true, the elements stay the same.
00275     while (old_index < size_used_ && !delete_cb->Run(old_index++))
00276       ++new_size;
00277     // Now just copy anything else that gets false from delete_cb.
00278     for (; old_index < size_used_; ++old_index) {
00279       if (!delete_cb->Run(old_index)) {
00280         data_[new_size++] = data_[old_index];
00281       }
00282     }
00283     size_used_ = new_size;
00284     delete delete_cb;
00285   }
00286 
00287   T dot_product(const GenericVector<T>& other) const {
00288     T result = static_cast<T>(0);
00289     for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i)
00290       result += data_[i] * other.data_[i];
00291     return result;
00292   }
00293 
00294   // Returns the index of what would be the target_index_th item in the array
00295   // if the members were sorted, without actually sorting. Members are
00296   // shuffled around, but it takes O(n) time.
00297   // NOTE: uses operator< and operator== on the members.
00298   int choose_nth_item(int target_index) {
00299     // Make sure target_index is legal.
00300     if (target_index < 0)
00301       target_index = 0;                   // ensure legal
00302     else if (target_index >= size_used_)
00303       target_index = size_used_ - 1;
00304     unsigned int seed = 1;
00305     return choose_nth_item(target_index, 0, size_used_, &seed);
00306   }
00307 
00308   // Swaps the elements with the given indices.
00309   void swap(int index1, int index2) {
00310     if (index1 != index2) {
00311       T tmp = data_[index1];
00312       data_[index1] = data_[index2];
00313       data_[index2] = tmp;
00314     }
00315   }
00316   // Returns true if all elements of *this are within the given range.
00317   // Only uses operator<
00318   bool WithinBounds(const T& rangemin, const T& rangemax) const {
00319     for (int i = 0; i < size_used_; ++i) {
00320       if (data_[i] < rangemin || rangemax < data_[i])
00321         return false;
00322     }
00323     return true;
00324   }
00325 
00326  protected:
00327   // Internal recursive version of choose_nth_item.
00328   int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
00329 
00330   // Init the object, allocating size memory.
00331   void init(int size);
00332 
00333   // We are assuming that the object generally placed in thie
00334   // vector are small enough that for efficiency it makes sense
00335   // to start with a larger initial size.
00336   static const int kDefaultVectorSize = 4;
00337   inT32   size_used_;
00338   inT32   size_reserved_;
00339   T*    data_;
00340   TessCallback1<T>* clear_cb_;
00341   // Mutable because Run method is not const
00342   mutable TessResultCallback2<bool, T const &, T const &>* compare_cb_;
00343 };
00344 
00345 namespace tesseract {
00346 
00347 // Function to read a GenericVector<char> from a whole file.
00348 // Returns false on failure.
00349 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
00350 // Function to write a GenericVector<char> to a whole file.
00351 // Returns false on failure.
00352 typedef bool (*FileWriter)(const GenericVector<char>& data,
00353                            const STRING& filename);
00354 // The default FileReader loads the whole file into the vector of char,
00355 // returning false on error.
00356 inline bool LoadDataFromFile(const STRING& filename,
00357                              GenericVector<char>* data) {
00358   FILE* fp = fopen(filename.string(), "rb");
00359   if (fp == NULL) return false;
00360   fseek(fp, 0, SEEK_END);
00361   size_t size = ftell(fp);
00362   fseek(fp, 0, SEEK_SET);
00363   // Pad with a 0, just in case we treat the result as a string.
00364   data->init_to_size((int)size + 1, 0);
00365   bool result = fread(&(*data)[0], 1, size, fp) == size;
00366   fclose(fp);
00367   return result;
00368 }
00369 // The default FileWriter writes the vector of char to the filename file,
00370 // returning false on error.
00371 inline bool SaveDataToFile(const GenericVector<char>& data,
00372                            const STRING& filename) {
00373   FILE* fp = fopen(filename.string(), "wb");
00374   if (fp == NULL) return false;
00375   bool result =
00376       static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
00377   fclose(fp);
00378   return result;
00379 }
00380 
00381 template <typename T>
00382 bool cmp_eq(T const & t1, T const & t2) {
00383   return t1 == t2;
00384 }
00385 
00386 // Used by sort()
00387 // return < 0 if t1 < t2
00388 // return 0 if t1 == t2
00389 // return > 0 if t1 > t2
00390 template <typename T>
00391 int sort_cmp(const void* t1, const void* t2) {
00392   const T* a = static_cast<const T *> (t1);
00393   const T* b = static_cast<const T *> (t2);
00394   if (*a < *b) {
00395     return -1;
00396   } else if (*b < *a) {
00397     return 1;
00398   } else {
00399     return 0;
00400   }
00401 }
00402 
00403 // Used by PointerVector::sort()
00404 // return < 0 if t1 < t2
00405 // return 0 if t1 == t2
00406 // return > 0 if t1 > t2
00407 template <typename T>
00408 int sort_ptr_cmp(const void* t1, const void* t2) {
00409   const T* a = *reinterpret_cast<T * const *>(t1);
00410   const T* b = *reinterpret_cast<T * const *>(t2);
00411   if (*a < *b) {
00412     return -1;
00413   } else if (*b < *a) {
00414     return 1;
00415   } else {
00416     return 0;
00417   }
00418 }
00419 
00420 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
00421 // as it provides automatic deletion and correct serialization, with the
00422 // corollary that all copy operations are deep copies of the pointed-to objects.
00423 template<typename T>
00424 class PointerVector : public GenericVector<T*> {
00425  public:
00426   PointerVector() : GenericVector<T*>() { }
00427   explicit PointerVector(int size) : GenericVector<T*>(size) { }
00428   ~PointerVector() {
00429     // Clear must be called here, even though it is called again by the base,
00430     // as the base will call the wrong clear.
00431     clear();
00432   }
00433   // Copy must be deep, as the pointers will be automatically deleted on
00434   // destruction.
00435   PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
00436     this->init(other.size());
00437     this->operator+=(other);
00438   }
00439   PointerVector<T>& operator+=(const PointerVector& other) {
00440     this->reserve(this->size_used_ + other.size_used_);
00441     for (int i = 0; i < other.size(); ++i) {
00442       this->push_back(new T(*other.data_[i]));
00443     }
00444     return *this;
00445   }
00446 
00447   PointerVector<T>& operator=(const PointerVector& other) {
00448     if (&other != this) {
00449       this->truncate(0);
00450       this->operator+=(other);
00451     }
00452     return *this;
00453   }
00454 
00455   // Removes an element at the given index and
00456   // shifts the remaining elements to the left.
00457   void remove(int index) {
00458     delete GenericVector<T*>::data_[index];
00459     GenericVector<T*>::remove(index);
00460   }
00461 
00462   // Truncates the array to the given size by removing the end.
00463   // If the current size is less, the array is not expanded.
00464   void truncate(int size) {
00465     for (int i = size; i < GenericVector<T*>::size_used_; ++i)
00466       delete GenericVector<T*>::data_[i];
00467     GenericVector<T*>::truncate(size);
00468   }
00469 
00470   // Compact the vector by deleting elements for which delete_cb returns
00471   // true. delete_cb is a permanent callback and will be deleted.
00472   void compact(TessResultCallback1<bool, const T*>* delete_cb) {
00473     int new_size = 0;
00474     int old_index = 0;
00475     // Until the callback returns true, the elements stay the same.
00476     while (old_index < GenericVector<T*>::size_used_ &&
00477            !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
00478       ++new_size;
00479     // Now just copy anything else that gets false from delete_cb.
00480     for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
00481       if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
00482         GenericVector<T*>::data_[new_size++] =
00483             GenericVector<T*>::data_[old_index];
00484       } else {
00485         delete GenericVector<T*>::data_[old_index];
00486       }
00487     }
00488     GenericVector<T*>::size_used_ = new_size;
00489     delete delete_cb;
00490   }
00491 
00492   // Clear the array, calling the clear callback function if any.
00493   // All the owned callbacks are also deleted.
00494   // If you don't want the callbacks to be deleted, before calling clear, set
00495   // the callback to NULL.
00496   void clear() {
00497     GenericVector<T*>::delete_data_pointers();
00498     GenericVector<T*>::clear();
00499   }
00500 
00501   // Writes a vector of (pointers to) classes to the given file. Assumes the
00502   // existence of bool T::Serialize(FILE*) const that returns false in case of
00503   // error. There is no Serialize for simple types, as you would have a
00504   // normal GenericVector of those.
00505   // Returns false in case of error.
00506   bool Serialize(FILE* fp) const {
00507     inT32 used = GenericVector<T*>::size_used_;
00508     if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
00509     for (int i = 0; i < used; ++i) {
00510       inT8 non_null = GenericVector<T*>::data_[i] != NULL;
00511       if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
00512       if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
00513     }
00514     return true;
00515   }
00516   bool Serialize(TFile* fp) const {
00517     inT32 used = GenericVector<T*>::size_used_;
00518     if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
00519     for (int i = 0; i < used; ++i) {
00520       inT8 non_null = GenericVector<T*>::data_[i] != NULL;
00521       if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
00522       if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
00523     }
00524     return true;
00525   }
00526   // Reads a vector of (pointers to) classes to the given file. Assumes the
00527   // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
00528   // case of error. There is no Serialize for simple types, as you would have a
00529   // normal GenericVector of those.
00530   // If swap is true, assumes a big/little-endian swap is needed.
00531   // Also needs T::T(), as new T is used in this function.
00532   // Returns false in case of error.
00533   bool DeSerialize(bool swap, FILE* fp) {
00534     inT32 reserved;
00535     if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00536     if (swap) Reverse32(&reserved);
00537     GenericVector<T*>::reserve(reserved);
00538     truncate(0);
00539     for (int i = 0; i < reserved; ++i) {
00540       inT8 non_null;
00541       if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
00542       T* item = NULL;
00543       if (non_null) {
00544         item = new T;
00545         if (!item->DeSerialize(swap, fp)) {
00546           delete item;
00547           return false;
00548         }
00549         this->push_back(item);
00550       } else {
00551         // Null elements should keep their place in the vector.
00552         this->push_back(NULL);
00553       }
00554     }
00555     return true;
00556   }
00557   bool DeSerialize(bool swap, TFile* fp) {
00558     inT32 reserved;
00559     if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
00560     if (swap) Reverse32(&reserved);
00561     GenericVector<T*>::reserve(reserved);
00562     truncate(0);
00563     for (int i = 0; i < reserved; ++i) {
00564       inT8 non_null;
00565       if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
00566       T* item = NULL;
00567       if (non_null) {
00568         item = new T;
00569         if (!item->DeSerialize(swap, fp)) {
00570           delete item;
00571           return false;
00572         }
00573         this->push_back(item);
00574       } else {
00575         // Null elements should keep their place in the vector.
00576         this->push_back(NULL);
00577       }
00578     }
00579     return true;
00580   }
00581 
00582   // Sorts the items pointed to by the members of this vector using
00583   // t::operator<().
00584   void sort() {
00585     sort(&sort_ptr_cmp<T>);
00586   }
00587 };
00588 
00589 }  // namespace tesseract
00590 
00591 // A useful vector that uses operator== to do comparisons.
00592 template <typename T>
00593 class GenericVectorEqEq : public GenericVector<T> {
00594  public:
00595   GenericVectorEqEq() {
00596     GenericVector<T>::set_compare_callback(
00597         NewPermanentTessCallback(tesseract::cmp_eq<T>));
00598   }
00599   GenericVectorEqEq(int size) : GenericVector<T>(size) {
00600     GenericVector<T>::set_compare_callback(
00601         NewPermanentTessCallback(tesseract::cmp_eq<T>));
00602   }
00603 };
00604 
00605 template <typename T>
00606 void GenericVector<T>::init(int size) {
00607   size_used_ = 0;
00608   size_reserved_ = 0;
00609   data_ = 0;
00610   clear_cb_ = 0;
00611   compare_cb_ = 0;
00612   reserve(size);
00613 }
00614 
00615 template <typename T>
00616 GenericVector<T>::~GenericVector() {
00617   clear();
00618 }
00619 
00620 // Reserve some memory. If the internal array contains elements, they are
00621 // copied.
00622 template <typename T>
00623 void GenericVector<T>::reserve(int size) {
00624   if (size_reserved_ >= size || size <= 0)
00625     return;
00626   T* new_array = new T[size];
00627   for (int i = 0; i < size_used_; ++i)
00628     new_array[i] = data_[i];
00629   if (data_ != NULL) delete[] data_;
00630   data_ = new_array;
00631   size_reserved_ = size;
00632 }
00633 
00634 template <typename T>
00635 void GenericVector<T>::double_the_size() {
00636   if (size_reserved_ == 0) {
00637     reserve(kDefaultVectorSize);
00638   }
00639   else {
00640     reserve(2 * size_reserved_);
00641   }
00642 }
00643 
00644 // Resizes to size and sets all values to t.
00645 template <typename T>
00646 void GenericVector<T>::init_to_size(int size, T t) {
00647   reserve(size);
00648   size_used_ = size;
00649   for (int i = 0; i < size; ++i)
00650     data_[i] = t;
00651 }
00652 
00653 
00654 // Return the object from an index.
00655 template <typename T>
00656 T &GenericVector<T>::get(int index) const {
00657   ASSERT_HOST(index >= 0 && index < size_used_);
00658   return data_[index];
00659 }
00660 
00661 template <typename T>
00662 T &GenericVector<T>::operator[](int index) const {
00663   assert(index >= 0 && index < size_used_);
00664   return data_[index];
00665 }
00666 
00667 template <typename T>
00668 T &GenericVector<T>::back() const {
00669   ASSERT_HOST(size_used_ > 0);
00670   return data_[size_used_ - 1];
00671 }
00672 // Returns the last object and removes it.
00673 template <typename T>
00674 T GenericVector<T>::pop_back() {
00675   ASSERT_HOST(size_used_ > 0);
00676   return data_[--size_used_];
00677 }
00678 
00679 // Return the object from an index.
00680 template <typename T>
00681 void GenericVector<T>::set(T t, int index) {
00682   ASSERT_HOST(index >= 0 && index < size_used_);
00683   data_[index] = t;
00684 }
00685 
00686 // Shifts the rest of the elements to the right to make
00687 // space for the new elements and inserts the given element
00688 // at the specified index.
00689 template <typename T>
00690 void GenericVector<T>::insert(T t, int index) {
00691   ASSERT_HOST(index >= 0 && index <= size_used_);
00692   if (size_reserved_ == size_used_)
00693     double_the_size();
00694   for (int i = size_used_; i > index; --i) {
00695     data_[i] = data_[i-1];
00696   }
00697   data_[index] = t;
00698   size_used_++;
00699 }
00700 
00701 // Removes an element at the given index and
00702 // shifts the remaining elements to the left.
00703 template <typename T>
00704 void GenericVector<T>::remove(int index) {
00705   ASSERT_HOST(index >= 0 && index < size_used_);
00706   for (int i = index; i < size_used_ - 1; ++i) {
00707     data_[i] = data_[i+1];
00708   }
00709   size_used_--;
00710 }
00711 
00712 // Return true if the index is valindex
00713 template <typename T>
00714 T GenericVector<T>::contains_index(int index) const {
00715   return index >= 0 && index < size_used_;
00716 }
00717 
00718 // Return the index of the T object.
00719 template <typename T>
00720 int GenericVector<T>::get_index(T object) const {
00721   for (int i = 0; i < size_used_; ++i) {
00722     ASSERT_HOST(compare_cb_ != NULL);
00723     if (compare_cb_->Run(object, data_[i]))
00724       return i;
00725   }
00726   return -1;
00727 }
00728 
00729 // Return true if T is in the array
00730 template <typename T>
00731 bool GenericVector<T>::contains(T object) const {
00732   return get_index(object) != -1;
00733 }
00734 
00735 // Add an element in the array
00736 template <typename T>
00737 int GenericVector<T>::push_back(T object) {
00738   int index = 0;
00739   if (size_used_ == size_reserved_)
00740     double_the_size();
00741   index = size_used_++;
00742   data_[index] = object;
00743   return index;
00744 }
00745 
00746 template <typename T>
00747 int GenericVector<T>::push_back_new(T object) {
00748   int index = get_index(object);
00749   if (index >= 0)
00750     return index;
00751   return push_back(object);
00752 }
00753 
00754 // Add an element in the array (front)
00755 template <typename T>
00756 int GenericVector<T>::push_front(T object) {
00757   if (size_used_ == size_reserved_)
00758     double_the_size();
00759   for (int i = size_used_; i > 0; --i)
00760     data_[i] = data_[i-1];
00761   data_[0] = object;
00762   ++size_used_;
00763   return 0;
00764 }
00765 
00766 template <typename T>
00767 void GenericVector<T>::operator+=(T t) {
00768   push_back(t);
00769 }
00770 
00771 template <typename T>
00772 GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) {
00773   this->reserve(size_used_ + other.size_used_);
00774   for (int i = 0; i < other.size(); ++i) {
00775     this->operator+=(other.data_[i]);
00776   }
00777   return *this;
00778 }
00779 
00780 template <typename T>
00781 GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) {
00782   if (&other != this) {
00783     this->truncate(0);
00784     this->operator+=(other);
00785   }
00786   return *this;
00787 }
00788 
00789 // Add a callback to be called to delete the elements when the array took
00790 // their ownership.
00791 template <typename T>
00792 void GenericVector<T>::set_clear_callback(TessCallback1<T>* cb) {
00793   clear_cb_ = cb;
00794 }
00795 
00796 // Add a callback to be called to delete the elements when the array took
00797 // their ownership.
00798 template <typename T>
00799 void GenericVector<T>::set_compare_callback(
00800     TessResultCallback2<bool, T const &, T const &>* cb) {
00801   compare_cb_ = cb;
00802 }
00803 
00804 // Clear the array, calling the callback function if any.
00805 template <typename T>
00806 void GenericVector<T>::clear() {
00807   if (size_reserved_ > 0) {
00808     if (clear_cb_ != NULL)
00809       for (int i = 0; i < size_used_; ++i)
00810         clear_cb_->Run(data_[i]);
00811     delete[] data_;
00812     data_ = NULL;
00813     size_used_ = 0;
00814     size_reserved_ = 0;
00815   }
00816   if (clear_cb_ != NULL) {
00817     delete clear_cb_;
00818     clear_cb_ = NULL;
00819   }
00820   if (compare_cb_ != NULL) {
00821     delete compare_cb_;
00822     compare_cb_ = NULL;
00823   }
00824 }
00825 
00826 template <typename T>
00827 void GenericVector<T>::delete_data_pointers() {
00828   for (int i = 0; i < size_used_; ++i)
00829     if (data_[i]) {
00830       delete data_[i];
00831     }
00832 }
00833 
00834 
00835 template <typename T>
00836 bool GenericVector<T>::write(
00837     FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
00838   if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
00839   if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
00840   if (cb != NULL) {
00841     for (int i = 0; i < size_used_; ++i) {
00842       if (!cb->Run(f, data_[i])) {
00843         delete cb;
00844         return false;
00845       }
00846     }
00847     delete cb;
00848   } else {
00849     if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false;
00850   }
00851   return true;
00852 }
00853 
00854 template <typename T>
00855 bool GenericVector<T>::read(FILE* f,
00856                             TessResultCallback3<bool, FILE*, T*, bool>* cb,
00857                             bool swap) {
00858   inT32 reserved;
00859   if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
00860   if (swap) Reverse32(&reserved);
00861   reserve(reserved);
00862   if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
00863   if (swap) Reverse32(&size_used_);
00864   if (cb != NULL) {
00865     for (int i = 0; i < size_used_; ++i) {
00866       if (!cb->Run(f, data_ + i, swap)) {
00867         delete cb;
00868         return false;
00869       }
00870     }
00871     delete cb;
00872   } else {
00873     if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
00874     if (swap) {
00875       for (int i = 0; i < size_used_; ++i)
00876         ReverseN(&data_[i], sizeof(T));
00877     }
00878   }
00879   return true;
00880 }
00881 
00882 // Writes a vector of simple types to the given file. Assumes that bitwise
00883 // read/write of T will work. Returns false in case of error.
00884 template <typename T>
00885 bool GenericVector<T>::Serialize(FILE* fp) const {
00886   if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
00887   if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
00888   return true;
00889 }
00890 template <typename T>
00891 bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
00892   if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
00893   if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
00894   return true;
00895 }
00896 
00897 // Reads a vector of simple types from the given file. Assumes that bitwise
00898 // read/write will work with ReverseN according to sizeof(T).
00899 // Returns false in case of error.
00900 // If swap is true, assumes a big/little-endian swap is needed.
00901 template <typename T>
00902 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
00903   inT32 reserved;
00904   if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00905   if (swap) Reverse32(&reserved);
00906   reserve(reserved);
00907   size_used_ = reserved;
00908   if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false;
00909   if (swap) {
00910     for (int i = 0; i < size_used_; ++i)
00911       ReverseN(&data_[i], sizeof(data_[i]));
00912   }
00913   return true;
00914 }
00915 template <typename T>
00916 bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
00917   inT32 reserved;
00918   if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
00919   if (swap) Reverse32(&reserved);
00920   reserve(reserved);
00921   size_used_ = reserved;
00922   if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
00923   if (swap) {
00924     for (int i = 0; i < size_used_; ++i)
00925       ReverseN(&data_[i], sizeof(data_[i]));
00926   }
00927   return true;
00928 }
00929 
00930 // Writes a vector of classes to the given file. Assumes the existence of
00931 // bool T::Serialize(FILE* fp) const that returns false in case of error.
00932 // Returns false in case of error.
00933 template <typename T>
00934 bool GenericVector<T>::SerializeClasses(FILE* fp) const {
00935   if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
00936   for (int i = 0; i < size_used_; ++i) {
00937     if (!data_[i].Serialize(fp)) return false;
00938   }
00939   return true;
00940 }
00941 template <typename T>
00942 bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
00943   if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
00944   for (int i = 0; i < size_used_; ++i) {
00945     if (!data_[i].Serialize(fp)) return false;
00946   }
00947   return true;
00948 }
00949 
00950 // Reads a vector of classes from the given file. Assumes the existence of
00951 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
00952 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
00953 // this function. Returns false in case of error.
00954 // If swap is true, assumes a big/little-endian swap is needed.
00955 template <typename T>
00956 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
00957   uinT32 reserved;
00958   if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
00959   if (swap) Reverse32(&reserved);
00960   T empty;
00961   init_to_size(reserved, empty);
00962   for (int i = 0; i < reserved; ++i) {
00963     if (!data_[i].DeSerialize(swap, fp)) return false;
00964   }
00965   return true;
00966 }
00967 template <typename T>
00968 bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
00969   uinT32 reserved;
00970   if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
00971   if (swap) Reverse32(&reserved);
00972   T empty;
00973   init_to_size(reserved, empty);
00974   for (int i = 0; i < reserved; ++i) {
00975     if (!data_[i].DeSerialize(swap, fp)) return false;
00976   }
00977   return true;
00978 }
00979 
00980 // This method clear the current object, then, does a shallow copy of
00981 // its argument, and finally invalidates its argument.
00982 template <typename T>
00983 void GenericVector<T>::move(GenericVector<T>* from) {
00984   this->clear();
00985   this->data_ = from->data_;
00986   this->size_reserved_ = from->size_reserved_;
00987   this->size_used_ = from->size_used_;
00988   this->compare_cb_ = from->compare_cb_;
00989   this->clear_cb_ = from->clear_cb_;
00990   from->data_ = NULL;
00991   from->clear_cb_ = NULL;
00992   from->compare_cb_ = NULL;
00993   from->size_used_ = 0;
00994   from->size_reserved_ = 0;
00995 }
00996 
00997 template <typename T>
00998 void GenericVector<T>::sort() {
00999   sort(&tesseract::sort_cmp<T>);
01000 }
01001 
01002 // Internal recursive version of choose_nth_item.
01003 // The algorithm used comes from "Algorithms" by Sedgewick:
01004 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
01005 // The principle is to choose a random pivot, and move everything less than
01006 // the pivot to its left, and everything greater than the pivot to the end
01007 // of the array, then recurse on the part that contains the desired index, or
01008 // just return the answer if it is in the equal section in the middle.
01009 // The random pivot guarantees average linear time for the same reason that
01010 // n times vector::push_back takes linear time on average.
01011 // target_index, start and and end are all indices into the full array.
01012 // Seed is a seed for rand_r for thread safety purposes. Its value is
01013 // unimportant as the random numbers do not affect the result except
01014 // between equal answers.
01015 template <typename T>
01016 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
01017                                       unsigned int* seed) {
01018   // Number of elements to process.
01019   int num_elements = end - start;
01020   // Trivial cases.
01021   if (num_elements <= 1)
01022     return start;
01023   if (num_elements == 2) {
01024     if (data_[start] < data_[start + 1]) {
01025       return target_index > start ? start + 1 : start;
01026     } else {
01027       return target_index > start ? start : start + 1;
01028     }
01029   }
01030   // Place the pivot at start.
01031   #ifndef rand_r  // _MSC_VER, ANDROID
01032   srand(*seed);
01033   #define rand_r(seed) rand()
01034   #endif  // _MSC_VER
01035   int pivot = rand_r(seed) % num_elements + start;
01036   swap(pivot, start);
01037   // The invariant condition here is that items [start, next_lesser) are less
01038   // than the pivot (which is at index next_lesser) and items
01039   // [prev_greater, end) are greater than the pivot, with items
01040   // [next_lesser, prev_greater) being equal to the pivot.
01041   int next_lesser = start;
01042   int prev_greater = end;
01043   for (int next_sample = start + 1; next_sample < prev_greater;) {
01044     if (data_[next_sample] < data_[next_lesser]) {
01045       swap(next_lesser++, next_sample++);
01046     } else if (data_[next_sample] == data_[next_lesser]) {
01047       ++next_sample;
01048     } else {
01049       swap(--prev_greater, next_sample);
01050     }
01051   }
01052   // Now the invariant is set up, we recurse on just the section that contains
01053   // the desired index.
01054   if (target_index < next_lesser)
01055     return choose_nth_item(target_index, start, next_lesser, seed);
01056   else if (target_index < prev_greater)
01057     return next_lesser;          // In equal bracket.
01058   else
01059     return choose_nth_item(target_index, prev_greater, end, seed);
01060 }
01061 
01062 
01063 #endif  // TESSERACT_CCUTIL_GENERICVECTOR_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines