|
tesseract 3.04.01
|
00001 00002 // File: genericvector.h 00003 // Description: Generic vector class 00004 // Author: Daria Antonova 00005 // Created: Mon Jun 23 11:26:43 PDT 2008 00006 // 00007 // (C) Copyright 2007, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 // 00020 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_ 00021 #define TESSERACT_CCUTIL_GENERICVECTOR_H_ 00022 00023 #include <assert.h> 00024 #include <stdio.h> 00025 #include <stdlib.h> 00026 00027 #include "tesscallback.h" 00028 #include "errcode.h" 00029 #include "helpers.h" 00030 #include "ndminx.h" 00031 #include "serialis.h" 00032 #include "strngs.h" 00033 00034 // Use PointerVector<T> below in preference to GenericVector<T*>, as that 00035 // provides automatic deletion of pointers, [De]Serialize that works, and 00036 // sort that works. 00037 template <typename T> 00038 class GenericVector { 00039 public: 00040 GenericVector() { 00041 init(kDefaultVectorSize); 00042 } 00043 GenericVector(int size, T init_val) { 00044 init(size); 00045 init_to_size(size, init_val); 00046 } 00047 00048 // Copy 00049 GenericVector(const GenericVector& other) { 00050 this->init(other.size()); 00051 this->operator+=(other); 00052 } 00053 GenericVector<T> &operator+=(const GenericVector& other); 00054 GenericVector<T> &operator=(const GenericVector& other); 00055 00056 ~GenericVector(); 00057 00058 // Reserve some memory. 00059 void reserve(int size); 00060 // Double the size of the internal array. 00061 void double_the_size(); 00062 00063 // Resizes to size and sets all values to t. 00064 void init_to_size(int size, T t); 00065 // Resizes to size without any initialization. 00066 void resize_no_init(int size) { 00067 reserve(size); 00068 size_used_ = size; 00069 } 00070 00071 // Return the size used. 00072 int size() const { 00073 return size_used_; 00074 } 00075 int size_reserved() const { 00076 return size_reserved_; 00077 } 00078 00079 int length() const { 00080 return size_used_; 00081 } 00082 00083 // Return true if empty. 00084 bool empty() const { 00085 return size_used_ == 0; 00086 } 00087 00088 // Return the object from an index. 00089 T &get(int index) const; 00090 T &back() const; 00091 T &operator[](int index) const; 00092 // Returns the last object and removes it. 00093 T pop_back(); 00094 00095 // Return the index of the T object. 00096 // This method NEEDS a compare_callback to be passed to 00097 // set_compare_callback. 00098 int get_index(T object) const; 00099 00100 // Return true if T is in the array 00101 bool contains(T object) const; 00102 00103 // Return true if the index is valid 00104 T contains_index(int index) const; 00105 00106 // Push an element in the end of the array 00107 int push_back(T object); 00108 void operator+=(T t); 00109 00110 // Push an element in the end of the array if the same 00111 // element is not already contained in the array. 00112 int push_back_new(T object); 00113 00114 // Push an element in the front of the array 00115 // Note: This function is O(n) 00116 int push_front(T object); 00117 00118 // Set the value at the given index 00119 void set(T t, int index); 00120 00121 // Insert t at the given index, push other elements to the right. 00122 void insert(T t, int index); 00123 00124 // Removes an element at the given index and 00125 // shifts the remaining elements to the left. 00126 void remove(int index); 00127 00128 // Truncates the array to the given size by removing the end. 00129 // If the current size is less, the array is not expanded. 00130 void truncate(int size) { 00131 if (size < size_used_) 00132 size_used_ = size; 00133 } 00134 00135 // Add a callback to be called to delete the elements when the array took 00136 // their ownership. 00137 void set_clear_callback(TessCallback1<T>* cb); 00138 00139 // Add a callback to be called to compare the elements when needed (contains, 00140 // get_id, ...) 00141 void set_compare_callback(TessResultCallback2<bool, T const &, T const &>* cb); 00142 00143 // Clear the array, calling the clear callback function if any. 00144 // All the owned callbacks are also deleted. 00145 // If you don't want the callbacks to be deleted, before calling clear, set 00146 // the callback to NULL. 00147 void clear(); 00148 00149 // Delete objects pointed to by data_[i] 00150 void delete_data_pointers(); 00151 00152 // This method clears the current object, then, does a shallow copy of 00153 // its argument, and finally invalidates its argument. 00154 // Callbacks are moved to the current object; 00155 void move(GenericVector<T>* from); 00156 00157 // Read/Write the array to a file. This does _NOT_ read/write the callbacks. 00158 // The callback given must be permanent since they will be called more than 00159 // once. The given callback will be deleted at the end. 00160 // If the callbacks are NULL, then the data is simply read/written using 00161 // fread (and swapping)/fwrite. 00162 // Returns false on error or if the callback returns false. 00163 // DEPRECATED. Use [De]Serialize[Classes] instead. 00164 bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const; 00165 bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap); 00166 // Writes a vector of simple types to the given file. Assumes that bitwise 00167 // read/write of T will work. Returns false in case of error. 00168 // TODO(rays) Change all callers to use TFile and remove deprecated methods. 00169 bool Serialize(FILE* fp) const; 00170 bool Serialize(tesseract::TFile* fp) const; 00171 // Reads a vector of simple types from the given file. Assumes that bitwise 00172 // read/write will work with ReverseN according to sizeof(T). 00173 // Returns false in case of error. 00174 // If swap is true, assumes a big/little-endian swap is needed. 00175 bool DeSerialize(bool swap, FILE* fp); 00176 bool DeSerialize(bool swap, tesseract::TFile* fp); 00177 // Writes a vector of classes to the given file. Assumes the existence of 00178 // bool T::Serialize(FILE* fp) const that returns false in case of error. 00179 // Returns false in case of error. 00180 bool SerializeClasses(FILE* fp) const; 00181 bool SerializeClasses(tesseract::TFile* fp) const; 00182 // Reads a vector of classes from the given file. Assumes the existence of 00183 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of 00184 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in 00185 // this function. Returns false in case of error. 00186 // If swap is true, assumes a big/little-endian swap is needed. 00187 bool DeSerializeClasses(bool swap, FILE* fp); 00188 bool DeSerializeClasses(bool swap, tesseract::TFile* fp); 00189 00190 // Allocates a new array of double the current_size, copies over the 00191 // information from data to the new location, deletes data and returns 00192 // the pointed to the new larger array. 00193 // This function uses memcpy to copy the data, instead of invoking 00194 // operator=() for each element like double_the_size() does. 00195 static T *double_the_size_memcpy(int current_size, T *data) { 00196 T *data_new = new T[current_size * 2]; 00197 memcpy(data_new, data, sizeof(T) * current_size); 00198 delete[] data; 00199 return data_new; 00200 } 00201 00202 // Reverses the elements of the vector. 00203 void reverse() { 00204 for (int i = 0; i < size_used_ / 2; ++i) 00205 Swap(&data_[i], &data_[size_used_ - 1 - i]); 00206 } 00207 00208 // Sorts the members of this vector using the less than comparator (cmp_lt), 00209 // which compares the values. Useful for GenericVectors to primitive types. 00210 // Will not work so great for pointers (unless you just want to sort some 00211 // pointers). You need to provide a specialization to sort_cmp to use 00212 // your type. 00213 void sort(); 00214 00215 // Sort the array into the order defined by the qsort function comparator. 00216 // The comparator function is as defined by qsort, ie. it receives pointers 00217 // to two Ts and returns negative if the first element is to appear earlier 00218 // in the result and positive if it is to appear later, with 0 for equal. 00219 void sort(int (*comparator)(const void*, const void*)) { 00220 qsort(data_, size_used_, sizeof(*data_), comparator); 00221 } 00222 00223 // Searches the array (assuming sorted in ascending order, using sort()) for 00224 // an element equal to target and returns true if it is present. 00225 // Use binary_search to get the index of target, or its nearest candidate. 00226 bool bool_binary_search(const T& target) const { 00227 int index = binary_search(target); 00228 if (index >= size_used_) 00229 return false; 00230 return data_[index] == target; 00231 } 00232 // Searches the array (assuming sorted in ascending order, using sort()) for 00233 // an element equal to target and returns the index of the best candidate. 00234 // The return value is conceptually the largest index i such that 00235 // data_[i] <= target or 0 if target < the whole vector. 00236 // NOTE that this function uses operator> so really the return value is 00237 // the largest index i such that data_[i] > target is false. 00238 int binary_search(const T& target) const { 00239 int bottom = 0; 00240 int top = size_used_; 00241 do { 00242 int middle = (bottom + top) / 2; 00243 if (data_[middle] > target) 00244 top = middle; 00245 else 00246 bottom = middle; 00247 } 00248 while (top - bottom > 1); 00249 return bottom; 00250 } 00251 00252 // Compact the vector by deleting elements using operator!= on basic types. 00253 // The vector must be sorted. 00254 void compact_sorted() { 00255 if (size_used_ == 0) 00256 return; 00257 00258 // First element is in no matter what, hence the i = 1. 00259 int last_write = 0; 00260 for (int i = 1; i < size_used_; ++i) { 00261 // Finds next unique item and writes it. 00262 if (data_[last_write] != data_[i]) 00263 data_[++last_write] = data_[i]; 00264 } 00265 // last_write is the index of a valid data cell, so add 1. 00266 size_used_ = last_write + 1; 00267 } 00268 00269 // Compact the vector by deleting elements for which delete_cb returns 00270 // true. delete_cb is a permanent callback and will be deleted. 00271 void compact(TessResultCallback1<bool, int>* delete_cb) { 00272 int new_size = 0; 00273 int old_index = 0; 00274 // Until the callback returns true, the elements stay the same. 00275 while (old_index < size_used_ && !delete_cb->Run(old_index++)) 00276 ++new_size; 00277 // Now just copy anything else that gets false from delete_cb. 00278 for (; old_index < size_used_; ++old_index) { 00279 if (!delete_cb->Run(old_index)) { 00280 data_[new_size++] = data_[old_index]; 00281 } 00282 } 00283 size_used_ = new_size; 00284 delete delete_cb; 00285 } 00286 00287 T dot_product(const GenericVector<T>& other) const { 00288 T result = static_cast<T>(0); 00289 for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i) 00290 result += data_[i] * other.data_[i]; 00291 return result; 00292 } 00293 00294 // Returns the index of what would be the target_index_th item in the array 00295 // if the members were sorted, without actually sorting. Members are 00296 // shuffled around, but it takes O(n) time. 00297 // NOTE: uses operator< and operator== on the members. 00298 int choose_nth_item(int target_index) { 00299 // Make sure target_index is legal. 00300 if (target_index < 0) 00301 target_index = 0; // ensure legal 00302 else if (target_index >= size_used_) 00303 target_index = size_used_ - 1; 00304 unsigned int seed = 1; 00305 return choose_nth_item(target_index, 0, size_used_, &seed); 00306 } 00307 00308 // Swaps the elements with the given indices. 00309 void swap(int index1, int index2) { 00310 if (index1 != index2) { 00311 T tmp = data_[index1]; 00312 data_[index1] = data_[index2]; 00313 data_[index2] = tmp; 00314 } 00315 } 00316 // Returns true if all elements of *this are within the given range. 00317 // Only uses operator< 00318 bool WithinBounds(const T& rangemin, const T& rangemax) const { 00319 for (int i = 0; i < size_used_; ++i) { 00320 if (data_[i] < rangemin || rangemax < data_[i]) 00321 return false; 00322 } 00323 return true; 00324 } 00325 00326 protected: 00327 // Internal recursive version of choose_nth_item. 00328 int choose_nth_item(int target_index, int start, int end, unsigned int* seed); 00329 00330 // Init the object, allocating size memory. 00331 void init(int size); 00332 00333 // We are assuming that the object generally placed in thie 00334 // vector are small enough that for efficiency it makes sense 00335 // to start with a larger initial size. 00336 static const int kDefaultVectorSize = 4; 00337 inT32 size_used_; 00338 inT32 size_reserved_; 00339 T* data_; 00340 TessCallback1<T>* clear_cb_; 00341 // Mutable because Run method is not const 00342 mutable TessResultCallback2<bool, T const &, T const &>* compare_cb_; 00343 }; 00344 00345 namespace tesseract { 00346 00347 // Function to read a GenericVector<char> from a whole file. 00348 // Returns false on failure. 00349 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data); 00350 // Function to write a GenericVector<char> to a whole file. 00351 // Returns false on failure. 00352 typedef bool (*FileWriter)(const GenericVector<char>& data, 00353 const STRING& filename); 00354 // The default FileReader loads the whole file into the vector of char, 00355 // returning false on error. 00356 inline bool LoadDataFromFile(const STRING& filename, 00357 GenericVector<char>* data) { 00358 FILE* fp = fopen(filename.string(), "rb"); 00359 if (fp == NULL) return false; 00360 fseek(fp, 0, SEEK_END); 00361 size_t size = ftell(fp); 00362 fseek(fp, 0, SEEK_SET); 00363 // Pad with a 0, just in case we treat the result as a string. 00364 data->init_to_size((int)size + 1, 0); 00365 bool result = fread(&(*data)[0], 1, size, fp) == size; 00366 fclose(fp); 00367 return result; 00368 } 00369 // The default FileWriter writes the vector of char to the filename file, 00370 // returning false on error. 00371 inline bool SaveDataToFile(const GenericVector<char>& data, 00372 const STRING& filename) { 00373 FILE* fp = fopen(filename.string(), "wb"); 00374 if (fp == NULL) return false; 00375 bool result = 00376 static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size(); 00377 fclose(fp); 00378 return result; 00379 } 00380 00381 template <typename T> 00382 bool cmp_eq(T const & t1, T const & t2) { 00383 return t1 == t2; 00384 } 00385 00386 // Used by sort() 00387 // return < 0 if t1 < t2 00388 // return 0 if t1 == t2 00389 // return > 0 if t1 > t2 00390 template <typename T> 00391 int sort_cmp(const void* t1, const void* t2) { 00392 const T* a = static_cast<const T *> (t1); 00393 const T* b = static_cast<const T *> (t2); 00394 if (*a < *b) { 00395 return -1; 00396 } else if (*b < *a) { 00397 return 1; 00398 } else { 00399 return 0; 00400 } 00401 } 00402 00403 // Used by PointerVector::sort() 00404 // return < 0 if t1 < t2 00405 // return 0 if t1 == t2 00406 // return > 0 if t1 > t2 00407 template <typename T> 00408 int sort_ptr_cmp(const void* t1, const void* t2) { 00409 const T* a = *reinterpret_cast<T * const *>(t1); 00410 const T* b = *reinterpret_cast<T * const *>(t2); 00411 if (*a < *b) { 00412 return -1; 00413 } else if (*b < *a) { 00414 return 1; 00415 } else { 00416 return 0; 00417 } 00418 } 00419 00420 // Subclass for a vector of pointers. Use in preference to GenericVector<T*> 00421 // as it provides automatic deletion and correct serialization, with the 00422 // corollary that all copy operations are deep copies of the pointed-to objects. 00423 template<typename T> 00424 class PointerVector : public GenericVector<T*> { 00425 public: 00426 PointerVector() : GenericVector<T*>() { } 00427 explicit PointerVector(int size) : GenericVector<T*>(size) { } 00428 ~PointerVector() { 00429 // Clear must be called here, even though it is called again by the base, 00430 // as the base will call the wrong clear. 00431 clear(); 00432 } 00433 // Copy must be deep, as the pointers will be automatically deleted on 00434 // destruction. 00435 PointerVector(const PointerVector& other) : GenericVector<T*>(other) { 00436 this->init(other.size()); 00437 this->operator+=(other); 00438 } 00439 PointerVector<T>& operator+=(const PointerVector& other) { 00440 this->reserve(this->size_used_ + other.size_used_); 00441 for (int i = 0; i < other.size(); ++i) { 00442 this->push_back(new T(*other.data_[i])); 00443 } 00444 return *this; 00445 } 00446 00447 PointerVector<T>& operator=(const PointerVector& other) { 00448 if (&other != this) { 00449 this->truncate(0); 00450 this->operator+=(other); 00451 } 00452 return *this; 00453 } 00454 00455 // Removes an element at the given index and 00456 // shifts the remaining elements to the left. 00457 void remove(int index) { 00458 delete GenericVector<T*>::data_[index]; 00459 GenericVector<T*>::remove(index); 00460 } 00461 00462 // Truncates the array to the given size by removing the end. 00463 // If the current size is less, the array is not expanded. 00464 void truncate(int size) { 00465 for (int i = size; i < GenericVector<T*>::size_used_; ++i) 00466 delete GenericVector<T*>::data_[i]; 00467 GenericVector<T*>::truncate(size); 00468 } 00469 00470 // Compact the vector by deleting elements for which delete_cb returns 00471 // true. delete_cb is a permanent callback and will be deleted. 00472 void compact(TessResultCallback1<bool, const T*>* delete_cb) { 00473 int new_size = 0; 00474 int old_index = 0; 00475 // Until the callback returns true, the elements stay the same. 00476 while (old_index < GenericVector<T*>::size_used_ && 00477 !delete_cb->Run(GenericVector<T*>::data_[old_index++])) 00478 ++new_size; 00479 // Now just copy anything else that gets false from delete_cb. 00480 for (; old_index < GenericVector<T*>::size_used_; ++old_index) { 00481 if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) { 00482 GenericVector<T*>::data_[new_size++] = 00483 GenericVector<T*>::data_[old_index]; 00484 } else { 00485 delete GenericVector<T*>::data_[old_index]; 00486 } 00487 } 00488 GenericVector<T*>::size_used_ = new_size; 00489 delete delete_cb; 00490 } 00491 00492 // Clear the array, calling the clear callback function if any. 00493 // All the owned callbacks are also deleted. 00494 // If you don't want the callbacks to be deleted, before calling clear, set 00495 // the callback to NULL. 00496 void clear() { 00497 GenericVector<T*>::delete_data_pointers(); 00498 GenericVector<T*>::clear(); 00499 } 00500 00501 // Writes a vector of (pointers to) classes to the given file. Assumes the 00502 // existence of bool T::Serialize(FILE*) const that returns false in case of 00503 // error. There is no Serialize for simple types, as you would have a 00504 // normal GenericVector of those. 00505 // Returns false in case of error. 00506 bool Serialize(FILE* fp) const { 00507 inT32 used = GenericVector<T*>::size_used_; 00508 if (fwrite(&used, sizeof(used), 1, fp) != 1) return false; 00509 for (int i = 0; i < used; ++i) { 00510 inT8 non_null = GenericVector<T*>::data_[i] != NULL; 00511 if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false; 00512 if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false; 00513 } 00514 return true; 00515 } 00516 bool Serialize(TFile* fp) const { 00517 inT32 used = GenericVector<T*>::size_used_; 00518 if (fp->FWrite(&used, sizeof(used), 1) != 1) return false; 00519 for (int i = 0; i < used; ++i) { 00520 inT8 non_null = GenericVector<T*>::data_[i] != NULL; 00521 if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false; 00522 if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false; 00523 } 00524 return true; 00525 } 00526 // Reads a vector of (pointers to) classes to the given file. Assumes the 00527 // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in 00528 // case of error. There is no Serialize for simple types, as you would have a 00529 // normal GenericVector of those. 00530 // If swap is true, assumes a big/little-endian swap is needed. 00531 // Also needs T::T(), as new T is used in this function. 00532 // Returns false in case of error. 00533 bool DeSerialize(bool swap, FILE* fp) { 00534 inT32 reserved; 00535 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00536 if (swap) Reverse32(&reserved); 00537 GenericVector<T*>::reserve(reserved); 00538 truncate(0); 00539 for (int i = 0; i < reserved; ++i) { 00540 inT8 non_null; 00541 if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; 00542 T* item = NULL; 00543 if (non_null) { 00544 item = new T; 00545 if (!item->DeSerialize(swap, fp)) { 00546 delete item; 00547 return false; 00548 } 00549 this->push_back(item); 00550 } else { 00551 // Null elements should keep their place in the vector. 00552 this->push_back(NULL); 00553 } 00554 } 00555 return true; 00556 } 00557 bool DeSerialize(bool swap, TFile* fp) { 00558 inT32 reserved; 00559 if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; 00560 if (swap) Reverse32(&reserved); 00561 GenericVector<T*>::reserve(reserved); 00562 truncate(0); 00563 for (int i = 0; i < reserved; ++i) { 00564 inT8 non_null; 00565 if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; 00566 T* item = NULL; 00567 if (non_null) { 00568 item = new T; 00569 if (!item->DeSerialize(swap, fp)) { 00570 delete item; 00571 return false; 00572 } 00573 this->push_back(item); 00574 } else { 00575 // Null elements should keep their place in the vector. 00576 this->push_back(NULL); 00577 } 00578 } 00579 return true; 00580 } 00581 00582 // Sorts the items pointed to by the members of this vector using 00583 // t::operator<(). 00584 void sort() { 00585 sort(&sort_ptr_cmp<T>); 00586 } 00587 }; 00588 00589 } // namespace tesseract 00590 00591 // A useful vector that uses operator== to do comparisons. 00592 template <typename T> 00593 class GenericVectorEqEq : public GenericVector<T> { 00594 public: 00595 GenericVectorEqEq() { 00596 GenericVector<T>::set_compare_callback( 00597 NewPermanentTessCallback(tesseract::cmp_eq<T>)); 00598 } 00599 GenericVectorEqEq(int size) : GenericVector<T>(size) { 00600 GenericVector<T>::set_compare_callback( 00601 NewPermanentTessCallback(tesseract::cmp_eq<T>)); 00602 } 00603 }; 00604 00605 template <typename T> 00606 void GenericVector<T>::init(int size) { 00607 size_used_ = 0; 00608 size_reserved_ = 0; 00609 data_ = 0; 00610 clear_cb_ = 0; 00611 compare_cb_ = 0; 00612 reserve(size); 00613 } 00614 00615 template <typename T> 00616 GenericVector<T>::~GenericVector() { 00617 clear(); 00618 } 00619 00620 // Reserve some memory. If the internal array contains elements, they are 00621 // copied. 00622 template <typename T> 00623 void GenericVector<T>::reserve(int size) { 00624 if (size_reserved_ >= size || size <= 0) 00625 return; 00626 T* new_array = new T[size]; 00627 for (int i = 0; i < size_used_; ++i) 00628 new_array[i] = data_[i]; 00629 if (data_ != NULL) delete[] data_; 00630 data_ = new_array; 00631 size_reserved_ = size; 00632 } 00633 00634 template <typename T> 00635 void GenericVector<T>::double_the_size() { 00636 if (size_reserved_ == 0) { 00637 reserve(kDefaultVectorSize); 00638 } 00639 else { 00640 reserve(2 * size_reserved_); 00641 } 00642 } 00643 00644 // Resizes to size and sets all values to t. 00645 template <typename T> 00646 void GenericVector<T>::init_to_size(int size, T t) { 00647 reserve(size); 00648 size_used_ = size; 00649 for (int i = 0; i < size; ++i) 00650 data_[i] = t; 00651 } 00652 00653 00654 // Return the object from an index. 00655 template <typename T> 00656 T &GenericVector<T>::get(int index) const { 00657 ASSERT_HOST(index >= 0 && index < size_used_); 00658 return data_[index]; 00659 } 00660 00661 template <typename T> 00662 T &GenericVector<T>::operator[](int index) const { 00663 assert(index >= 0 && index < size_used_); 00664 return data_[index]; 00665 } 00666 00667 template <typename T> 00668 T &GenericVector<T>::back() const { 00669 ASSERT_HOST(size_used_ > 0); 00670 return data_[size_used_ - 1]; 00671 } 00672 // Returns the last object and removes it. 00673 template <typename T> 00674 T GenericVector<T>::pop_back() { 00675 ASSERT_HOST(size_used_ > 0); 00676 return data_[--size_used_]; 00677 } 00678 00679 // Return the object from an index. 00680 template <typename T> 00681 void GenericVector<T>::set(T t, int index) { 00682 ASSERT_HOST(index >= 0 && index < size_used_); 00683 data_[index] = t; 00684 } 00685 00686 // Shifts the rest of the elements to the right to make 00687 // space for the new elements and inserts the given element 00688 // at the specified index. 00689 template <typename T> 00690 void GenericVector<T>::insert(T t, int index) { 00691 ASSERT_HOST(index >= 0 && index <= size_used_); 00692 if (size_reserved_ == size_used_) 00693 double_the_size(); 00694 for (int i = size_used_; i > index; --i) { 00695 data_[i] = data_[i-1]; 00696 } 00697 data_[index] = t; 00698 size_used_++; 00699 } 00700 00701 // Removes an element at the given index and 00702 // shifts the remaining elements to the left. 00703 template <typename T> 00704 void GenericVector<T>::remove(int index) { 00705 ASSERT_HOST(index >= 0 && index < size_used_); 00706 for (int i = index; i < size_used_ - 1; ++i) { 00707 data_[i] = data_[i+1]; 00708 } 00709 size_used_--; 00710 } 00711 00712 // Return true if the index is valindex 00713 template <typename T> 00714 T GenericVector<T>::contains_index(int index) const { 00715 return index >= 0 && index < size_used_; 00716 } 00717 00718 // Return the index of the T object. 00719 template <typename T> 00720 int GenericVector<T>::get_index(T object) const { 00721 for (int i = 0; i < size_used_; ++i) { 00722 ASSERT_HOST(compare_cb_ != NULL); 00723 if (compare_cb_->Run(object, data_[i])) 00724 return i; 00725 } 00726 return -1; 00727 } 00728 00729 // Return true if T is in the array 00730 template <typename T> 00731 bool GenericVector<T>::contains(T object) const { 00732 return get_index(object) != -1; 00733 } 00734 00735 // Add an element in the array 00736 template <typename T> 00737 int GenericVector<T>::push_back(T object) { 00738 int index = 0; 00739 if (size_used_ == size_reserved_) 00740 double_the_size(); 00741 index = size_used_++; 00742 data_[index] = object; 00743 return index; 00744 } 00745 00746 template <typename T> 00747 int GenericVector<T>::push_back_new(T object) { 00748 int index = get_index(object); 00749 if (index >= 0) 00750 return index; 00751 return push_back(object); 00752 } 00753 00754 // Add an element in the array (front) 00755 template <typename T> 00756 int GenericVector<T>::push_front(T object) { 00757 if (size_used_ == size_reserved_) 00758 double_the_size(); 00759 for (int i = size_used_; i > 0; --i) 00760 data_[i] = data_[i-1]; 00761 data_[0] = object; 00762 ++size_used_; 00763 return 0; 00764 } 00765 00766 template <typename T> 00767 void GenericVector<T>::operator+=(T t) { 00768 push_back(t); 00769 } 00770 00771 template <typename T> 00772 GenericVector<T> &GenericVector<T>::operator+=(const GenericVector& other) { 00773 this->reserve(size_used_ + other.size_used_); 00774 for (int i = 0; i < other.size(); ++i) { 00775 this->operator+=(other.data_[i]); 00776 } 00777 return *this; 00778 } 00779 00780 template <typename T> 00781 GenericVector<T> &GenericVector<T>::operator=(const GenericVector& other) { 00782 if (&other != this) { 00783 this->truncate(0); 00784 this->operator+=(other); 00785 } 00786 return *this; 00787 } 00788 00789 // Add a callback to be called to delete the elements when the array took 00790 // their ownership. 00791 template <typename T> 00792 void GenericVector<T>::set_clear_callback(TessCallback1<T>* cb) { 00793 clear_cb_ = cb; 00794 } 00795 00796 // Add a callback to be called to delete the elements when the array took 00797 // their ownership. 00798 template <typename T> 00799 void GenericVector<T>::set_compare_callback( 00800 TessResultCallback2<bool, T const &, T const &>* cb) { 00801 compare_cb_ = cb; 00802 } 00803 00804 // Clear the array, calling the callback function if any. 00805 template <typename T> 00806 void GenericVector<T>::clear() { 00807 if (size_reserved_ > 0) { 00808 if (clear_cb_ != NULL) 00809 for (int i = 0; i < size_used_; ++i) 00810 clear_cb_->Run(data_[i]); 00811 delete[] data_; 00812 data_ = NULL; 00813 size_used_ = 0; 00814 size_reserved_ = 0; 00815 } 00816 if (clear_cb_ != NULL) { 00817 delete clear_cb_; 00818 clear_cb_ = NULL; 00819 } 00820 if (compare_cb_ != NULL) { 00821 delete compare_cb_; 00822 compare_cb_ = NULL; 00823 } 00824 } 00825 00826 template <typename T> 00827 void GenericVector<T>::delete_data_pointers() { 00828 for (int i = 0; i < size_used_; ++i) 00829 if (data_[i]) { 00830 delete data_[i]; 00831 } 00832 } 00833 00834 00835 template <typename T> 00836 bool GenericVector<T>::write( 00837 FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const { 00838 if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false; 00839 if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false; 00840 if (cb != NULL) { 00841 for (int i = 0; i < size_used_; ++i) { 00842 if (!cb->Run(f, data_[i])) { 00843 delete cb; 00844 return false; 00845 } 00846 } 00847 delete cb; 00848 } else { 00849 if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false; 00850 } 00851 return true; 00852 } 00853 00854 template <typename T> 00855 bool GenericVector<T>::read(FILE* f, 00856 TessResultCallback3<bool, FILE*, T*, bool>* cb, 00857 bool swap) { 00858 inT32 reserved; 00859 if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false; 00860 if (swap) Reverse32(&reserved); 00861 reserve(reserved); 00862 if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false; 00863 if (swap) Reverse32(&size_used_); 00864 if (cb != NULL) { 00865 for (int i = 0; i < size_used_; ++i) { 00866 if (!cb->Run(f, data_ + i, swap)) { 00867 delete cb; 00868 return false; 00869 } 00870 } 00871 delete cb; 00872 } else { 00873 if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false; 00874 if (swap) { 00875 for (int i = 0; i < size_used_; ++i) 00876 ReverseN(&data_[i], sizeof(T)); 00877 } 00878 } 00879 return true; 00880 } 00881 00882 // Writes a vector of simple types to the given file. Assumes that bitwise 00883 // read/write of T will work. Returns false in case of error. 00884 template <typename T> 00885 bool GenericVector<T>::Serialize(FILE* fp) const { 00886 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; 00887 if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false; 00888 return true; 00889 } 00890 template <typename T> 00891 bool GenericVector<T>::Serialize(tesseract::TFile* fp) const { 00892 if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false; 00893 if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false; 00894 return true; 00895 } 00896 00897 // Reads a vector of simple types from the given file. Assumes that bitwise 00898 // read/write will work with ReverseN according to sizeof(T). 00899 // Returns false in case of error. 00900 // If swap is true, assumes a big/little-endian swap is needed. 00901 template <typename T> 00902 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) { 00903 inT32 reserved; 00904 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00905 if (swap) Reverse32(&reserved); 00906 reserve(reserved); 00907 size_used_ = reserved; 00908 if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false; 00909 if (swap) { 00910 for (int i = 0; i < size_used_; ++i) 00911 ReverseN(&data_[i], sizeof(data_[i])); 00912 } 00913 return true; 00914 } 00915 template <typename T> 00916 bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) { 00917 inT32 reserved; 00918 if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; 00919 if (swap) Reverse32(&reserved); 00920 reserve(reserved); 00921 size_used_ = reserved; 00922 if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false; 00923 if (swap) { 00924 for (int i = 0; i < size_used_; ++i) 00925 ReverseN(&data_[i], sizeof(data_[i])); 00926 } 00927 return true; 00928 } 00929 00930 // Writes a vector of classes to the given file. Assumes the existence of 00931 // bool T::Serialize(FILE* fp) const that returns false in case of error. 00932 // Returns false in case of error. 00933 template <typename T> 00934 bool GenericVector<T>::SerializeClasses(FILE* fp) const { 00935 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; 00936 for (int i = 0; i < size_used_; ++i) { 00937 if (!data_[i].Serialize(fp)) return false; 00938 } 00939 return true; 00940 } 00941 template <typename T> 00942 bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const { 00943 if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false; 00944 for (int i = 0; i < size_used_; ++i) { 00945 if (!data_[i].Serialize(fp)) return false; 00946 } 00947 return true; 00948 } 00949 00950 // Reads a vector of classes from the given file. Assumes the existence of 00951 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of 00952 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in 00953 // this function. Returns false in case of error. 00954 // If swap is true, assumes a big/little-endian swap is needed. 00955 template <typename T> 00956 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) { 00957 uinT32 reserved; 00958 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; 00959 if (swap) Reverse32(&reserved); 00960 T empty; 00961 init_to_size(reserved, empty); 00962 for (int i = 0; i < reserved; ++i) { 00963 if (!data_[i].DeSerialize(swap, fp)) return false; 00964 } 00965 return true; 00966 } 00967 template <typename T> 00968 bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) { 00969 uinT32 reserved; 00970 if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; 00971 if (swap) Reverse32(&reserved); 00972 T empty; 00973 init_to_size(reserved, empty); 00974 for (int i = 0; i < reserved; ++i) { 00975 if (!data_[i].DeSerialize(swap, fp)) return false; 00976 } 00977 return true; 00978 } 00979 00980 // This method clear the current object, then, does a shallow copy of 00981 // its argument, and finally invalidates its argument. 00982 template <typename T> 00983 void GenericVector<T>::move(GenericVector<T>* from) { 00984 this->clear(); 00985 this->data_ = from->data_; 00986 this->size_reserved_ = from->size_reserved_; 00987 this->size_used_ = from->size_used_; 00988 this->compare_cb_ = from->compare_cb_; 00989 this->clear_cb_ = from->clear_cb_; 00990 from->data_ = NULL; 00991 from->clear_cb_ = NULL; 00992 from->compare_cb_ = NULL; 00993 from->size_used_ = 0; 00994 from->size_reserved_ = 0; 00995 } 00996 00997 template <typename T> 00998 void GenericVector<T>::sort() { 00999 sort(&tesseract::sort_cmp<T>); 01000 } 01001 01002 // Internal recursive version of choose_nth_item. 01003 // The algorithm used comes from "Algorithms" by Sedgewick: 01004 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC 01005 // The principle is to choose a random pivot, and move everything less than 01006 // the pivot to its left, and everything greater than the pivot to the end 01007 // of the array, then recurse on the part that contains the desired index, or 01008 // just return the answer if it is in the equal section in the middle. 01009 // The random pivot guarantees average linear time for the same reason that 01010 // n times vector::push_back takes linear time on average. 01011 // target_index, start and and end are all indices into the full array. 01012 // Seed is a seed for rand_r for thread safety purposes. Its value is 01013 // unimportant as the random numbers do not affect the result except 01014 // between equal answers. 01015 template <typename T> 01016 int GenericVector<T>::choose_nth_item(int target_index, int start, int end, 01017 unsigned int* seed) { 01018 // Number of elements to process. 01019 int num_elements = end - start; 01020 // Trivial cases. 01021 if (num_elements <= 1) 01022 return start; 01023 if (num_elements == 2) { 01024 if (data_[start] < data_[start + 1]) { 01025 return target_index > start ? start + 1 : start; 01026 } else { 01027 return target_index > start ? start : start + 1; 01028 } 01029 } 01030 // Place the pivot at start. 01031 #ifndef rand_r // _MSC_VER, ANDROID 01032 srand(*seed); 01033 #define rand_r(seed) rand() 01034 #endif // _MSC_VER 01035 int pivot = rand_r(seed) % num_elements + start; 01036 swap(pivot, start); 01037 // The invariant condition here is that items [start, next_lesser) are less 01038 // than the pivot (which is at index next_lesser) and items 01039 // [prev_greater, end) are greater than the pivot, with items 01040 // [next_lesser, prev_greater) being equal to the pivot. 01041 int next_lesser = start; 01042 int prev_greater = end; 01043 for (int next_sample = start + 1; next_sample < prev_greater;) { 01044 if (data_[next_sample] < data_[next_lesser]) { 01045 swap(next_lesser++, next_sample++); 01046 } else if (data_[next_sample] == data_[next_lesser]) { 01047 ++next_sample; 01048 } else { 01049 swap(--prev_greater, next_sample); 01050 } 01051 } 01052 // Now the invariant is set up, we recurse on just the section that contains 01053 // the desired index. 01054 if (target_index < next_lesser) 01055 return choose_nth_item(target_index, start, next_lesser, seed); 01056 else if (target_index < prev_greater) 01057 return next_lesser; // In equal bracket. 01058 else 01059 return choose_nth_item(target_index, prev_greater, end, seed); 01060 } 01061 01062 01063 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_