|
tesseract 3.04.01
|
00001 /********************************************************************** 00002 * File: statistc.c (Formerly stats.c) 00003 * Description: Simple statistical package for integer values. 00004 * Author: Ray Smith 00005 * Created: Mon Feb 04 16:56:05 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include "statistc.h" 00026 #include <string.h> 00027 #include <math.h> 00028 #include <stdlib.h> 00029 #include "helpers.h" 00030 #include "scrollview.h" 00031 #include "tprintf.h" 00032 00033 using tesseract::KDPairInc; 00034 00035 /********************************************************************** 00036 * STATS::STATS 00037 * 00038 * Construct a new stats element by allocating and zeroing the memory. 00039 **********************************************************************/ 00040 STATS::STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) { 00041 if (max_bucket_value_plus_1 <= min_bucket_value) { 00042 min_bucket_value = 0; 00043 max_bucket_value_plus_1 = 1; 00044 } 00045 rangemin_ = min_bucket_value; // setup 00046 rangemax_ = max_bucket_value_plus_1; 00047 buckets_ = new inT32[rangemax_ - rangemin_]; 00048 clear(); 00049 } 00050 00051 STATS::STATS() { 00052 rangemax_ = 0; 00053 rangemin_ = 0; 00054 buckets_ = NULL; 00055 } 00056 00057 /********************************************************************** 00058 * STATS::set_range 00059 * 00060 * Alter the range on an existing stats element. 00061 **********************************************************************/ 00062 bool STATS::set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1) { 00063 if (max_bucket_value_plus_1 <= min_bucket_value) { 00064 return false; 00065 } 00066 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) { 00067 delete [] buckets_; 00068 buckets_ = new inT32[max_bucket_value_plus_1 - min_bucket_value]; 00069 } 00070 rangemin_ = min_bucket_value; // setup 00071 rangemax_ = max_bucket_value_plus_1; 00072 clear(); // zero it 00073 return true; 00074 } 00075 00076 /********************************************************************** 00077 * STATS::clear 00078 * 00079 * Clear out the STATS class by zeroing all the buckets. 00080 **********************************************************************/ 00081 void STATS::clear() { // clear out buckets 00082 total_count_ = 0; 00083 if (buckets_ != NULL) 00084 memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0])); 00085 } 00086 00087 /********************************************************************** 00088 * STATS::~STATS 00089 * 00090 * Destructor for a stats class. 00091 **********************************************************************/ 00092 STATS::~STATS () { 00093 if (buckets_ != NULL) { 00094 delete [] buckets_; 00095 buckets_ = NULL; 00096 } 00097 } 00098 00099 /********************************************************************** 00100 * STATS::add 00101 * 00102 * Add a set of samples to (or delete from) a pile. 00103 **********************************************************************/ 00104 void STATS::add(inT32 value, inT32 count) { 00105 if (buckets_ == NULL) { 00106 return; 00107 } 00108 value = ClipToRange(value, rangemin_, rangemax_ - 1); 00109 buckets_[value - rangemin_] += count; 00110 total_count_ += count; // keep count of total 00111 } 00112 00113 /********************************************************************** 00114 * STATS::mode 00115 * 00116 * Find the mode of a stats class. 00117 **********************************************************************/ 00118 inT32 STATS::mode() const { // get mode of samples 00119 if (buckets_ == NULL) { 00120 return rangemin_; 00121 } 00122 inT32 max = buckets_[0]; // max cell count 00123 inT32 maxindex = 0; // index of max 00124 for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) { 00125 if (buckets_[index] > max) { 00126 max = buckets_[index]; // find biggest 00127 maxindex = index; 00128 } 00129 } 00130 return maxindex + rangemin_; // index of biggest 00131 } 00132 00133 /********************************************************************** 00134 * STATS::mean 00135 * 00136 * Find the mean of a stats class. 00137 **********************************************************************/ 00138 double STATS::mean() const { //get mean of samples 00139 if (buckets_ == NULL || total_count_ <= 0) { 00140 return static_cast<double>(rangemin_); 00141 } 00142 inT64 sum = 0; 00143 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { 00144 sum += static_cast<inT64>(index) * buckets_[index]; 00145 } 00146 return static_cast<double>(sum) / total_count_ + rangemin_; 00147 } 00148 00149 /********************************************************************** 00150 * STATS::sd 00151 * 00152 * Find the standard deviation of a stats class. 00153 **********************************************************************/ 00154 double STATS::sd() const { //standard deviation 00155 if (buckets_ == NULL || total_count_ <= 0) { 00156 return 0.0; 00157 } 00158 inT64 sum = 0; 00159 double sqsum = 0.0; 00160 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) { 00161 sum += static_cast<inT64>(index) * buckets_[index]; 00162 sqsum += static_cast<double>(index) * index * buckets_[index]; 00163 } 00164 double variance = static_cast<double>(sum) / total_count_; 00165 variance = sqsum / total_count_ - variance * variance; 00166 if (variance > 0.0) 00167 return sqrt(variance); 00168 return 0.0; 00169 } 00170 00171 /********************************************************************** 00172 * STATS::ile 00173 * 00174 * Returns the fractile value such that frac fraction (in [0,1]) of samples 00175 * has a value less than the return value. 00176 **********************************************************************/ 00177 double STATS::ile(double frac) const { 00178 if (buckets_ == NULL || total_count_ == 0) { 00179 return static_cast<double>(rangemin_); 00180 } 00181 #if 0 00182 // TODO(rays) The existing code doesn't seem to be doing the right thing 00183 // with target a double but this substitute crashes the code that uses it. 00184 // Investigate and fix properly. 00185 int target = IntCastRounded(frac * total_count_); 00186 target = ClipToRange(target, 1, total_count_); 00187 #else 00188 double target = frac * total_count_; 00189 target = ClipToRange(target, 1.0, static_cast<double>(total_count_)); 00190 #endif 00191 int sum = 0; 00192 int index = 0; 00193 for (index = 0; index < rangemax_ - rangemin_ && sum < target; 00194 sum += buckets_[index++]); 00195 if (index > 0) { 00196 ASSERT_HOST(buckets_[index - 1] > 0); 00197 return rangemin_ + index - 00198 static_cast<double>(sum - target) / buckets_[index - 1]; 00199 } else { 00200 return static_cast<double>(rangemin_); 00201 } 00202 } 00203 00204 /********************************************************************** 00205 * STATS::min_bucket 00206 * 00207 * Find REAL minimum bucket - ile(0.0) isn't necessarily correct 00208 **********************************************************************/ 00209 inT32 STATS::min_bucket() const { // Find min 00210 if (buckets_ == NULL || total_count_ == 0) { 00211 return rangemin_; 00212 } 00213 inT32 min = 0; 00214 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++); 00215 return rangemin_ + min; 00216 } 00217 00218 00219 /********************************************************************** 00220 * STATS::max_bucket 00221 * 00222 * Find REAL maximum bucket - ile(1.0) isn't necessarily correct 00223 **********************************************************************/ 00224 00225 inT32 STATS::max_bucket() const { // Find max 00226 if (buckets_ == NULL || total_count_ == 0) { 00227 return rangemin_; 00228 } 00229 inT32 max; 00230 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--); 00231 return rangemin_ + max; 00232 } 00233 00234 /********************************************************************** 00235 * STATS::median 00236 * 00237 * Finds a more useful estimate of median than ile(0.5). 00238 * 00239 * Overcomes a problem with ile() - if the samples are, for example, 00240 * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway 00241 * between 6 and 13 = 9.5 00242 **********************************************************************/ 00243 double STATS::median() const { //get median 00244 if (buckets_ == NULL) { 00245 return static_cast<double>(rangemin_); 00246 } 00247 double median = ile(0.5); 00248 int median_pile = static_cast<int>(floor(median)); 00249 if ((total_count_ > 1) && (pile_count(median_pile) == 0)) { 00250 inT32 min_pile; 00251 inT32 max_pile; 00252 /* Find preceding non zero pile */ 00253 for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--); 00254 /* Find following non zero pile */ 00255 for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++); 00256 median = (min_pile + max_pile) / 2.0; 00257 } 00258 return median; 00259 } 00260 00261 /********************************************************************** 00262 * STATS::local_min 00263 * 00264 * Return TRUE if this point is a local min. 00265 **********************************************************************/ 00266 bool STATS::local_min(inT32 x) const { 00267 if (buckets_ == NULL) { 00268 return false; 00269 } 00270 x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_; 00271 if (buckets_[x] == 0) 00272 return true; 00273 inT32 index; // table index 00274 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index); 00275 if (index >= 0 && buckets_[index] < buckets_[x]) 00276 return false; 00277 for (index = x + 1; index < rangemax_ - rangemin_ && 00278 buckets_[index] == buckets_[x]; ++index); 00279 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) 00280 return false; 00281 else 00282 return true; 00283 } 00284 00285 /********************************************************************** 00286 * STATS::smooth 00287 * 00288 * Apply a triangular smoothing filter to the stats. 00289 * This makes the modes a bit more useful. 00290 * The factor gives the height of the triangle, i.e. the weight of the 00291 * centre. 00292 **********************************************************************/ 00293 void STATS::smooth(inT32 factor) { 00294 if (buckets_ == NULL || factor < 2) { 00295 return; 00296 } 00297 STATS result(rangemin_, rangemax_); 00298 int entrycount = rangemax_ - rangemin_; 00299 for (int entry = 0; entry < entrycount; entry++) { 00300 //centre weight 00301 int count = buckets_[entry] * factor; 00302 for (int offset = 1; offset < factor; offset++) { 00303 if (entry - offset >= 0) 00304 count += buckets_[entry - offset] * (factor - offset); 00305 if (entry + offset < entrycount) 00306 count += buckets_[entry + offset] * (factor - offset); 00307 } 00308 result.add(entry + rangemin_, count); 00309 } 00310 total_count_ = result.total_count_; 00311 memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0])); 00312 } 00313 00314 /********************************************************************** 00315 * STATS::cluster 00316 * 00317 * Cluster the samples into max_cluster clusters. 00318 * Each call runs one iteration. The array of clusters must be 00319 * max_clusters+1 in size as cluster 0 is used to indicate which samples 00320 * have been used. 00321 * The return value is the current number of clusters. 00322 **********************************************************************/ 00323 00324 inT32 STATS::cluster(float lower, // thresholds 00325 float upper, 00326 float multiple, // distance threshold 00327 inT32 max_clusters, // max no to make 00328 STATS *clusters) { // array of clusters 00329 BOOL8 new_cluster; // added one 00330 float *centres; // cluster centres 00331 inT32 entry; // bucket index 00332 inT32 cluster; // cluster index 00333 inT32 best_cluster; // one to assign to 00334 inT32 new_centre = 0; // residual mode 00335 inT32 new_mode; // pile count of new_centre 00336 inT32 count; // pile to place 00337 float dist; // from cluster 00338 float min_dist; // from best_cluster 00339 inT32 cluster_count; // no of clusters 00340 00341 if (buckets_ == NULL || max_clusters < 1) 00342 return 0; 00343 centres = new float[max_clusters + 1]; 00344 for (cluster_count = 1; cluster_count <= max_clusters 00345 && clusters[cluster_count].buckets_ != NULL 00346 && clusters[cluster_count].total_count_ > 0; 00347 cluster_count++) { 00348 centres[cluster_count] = 00349 static_cast<float>(clusters[cluster_count].ile(0.5)); 00350 new_centre = clusters[cluster_count].mode(); 00351 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00352 && entry >= rangemin_ 00353 && pile_count(entry) <= pile_count(entry + 1); 00354 entry--) { 00355 count = pile_count(entry) - clusters[0].pile_count(entry); 00356 if (count > 0) { 00357 clusters[cluster_count].add(entry, count); 00358 clusters[0].add (entry, count); 00359 } 00360 } 00361 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00362 && entry < rangemax_ 00363 && pile_count(entry) <= pile_count(entry - 1); 00364 entry++) { 00365 count = pile_count(entry) - clusters[0].pile_count(entry); 00366 if (count > 0) { 00367 clusters[cluster_count].add(entry, count); 00368 clusters[0].add(entry, count); 00369 } 00370 } 00371 } 00372 cluster_count--; 00373 00374 if (cluster_count == 0) { 00375 clusters[0].set_range(rangemin_, rangemax_); 00376 } 00377 do { 00378 new_cluster = FALSE; 00379 new_mode = 0; 00380 for (entry = 0; entry < rangemax_ - rangemin_; entry++) { 00381 count = buckets_[entry] - clusters[0].buckets_[entry]; 00382 //remaining pile 00383 if (count > 0) { //any to handle 00384 min_dist = static_cast<float>(MAX_INT32); 00385 best_cluster = 0; 00386 for (cluster = 1; cluster <= cluster_count; cluster++) { 00387 dist = entry + rangemin_ - centres[cluster]; 00388 //find distance 00389 if (dist < 0) 00390 dist = -dist; 00391 if (dist < min_dist) { 00392 min_dist = dist; //find least 00393 best_cluster = cluster; 00394 } 00395 } 00396 if (min_dist > upper //far enough for new 00397 && (best_cluster == 0 00398 || entry + rangemin_ > centres[best_cluster] * multiple 00399 || entry + rangemin_ < centres[best_cluster] / multiple)) { 00400 if (count > new_mode) { 00401 new_mode = count; 00402 new_centre = entry + rangemin_; 00403 } 00404 } 00405 } 00406 } 00407 // need new and room 00408 if (new_mode > 0 && cluster_count < max_clusters) { 00409 cluster_count++; 00410 new_cluster = TRUE; 00411 if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) { 00412 delete [] centres; 00413 return 0; 00414 } 00415 centres[cluster_count] = static_cast<float>(new_centre); 00416 clusters[cluster_count].add(new_centre, new_mode); 00417 clusters[0].add(new_centre, new_mode); 00418 for (entry = new_centre - 1; centres[cluster_count] - entry < lower 00419 && entry >= rangemin_ 00420 && pile_count (entry) <= pile_count(entry + 1); entry--) { 00421 count = pile_count(entry) - clusters[0].pile_count(entry); 00422 if (count > 0) { 00423 clusters[cluster_count].add(entry, count); 00424 clusters[0].add(entry, count); 00425 } 00426 } 00427 for (entry = new_centre + 1; entry - centres[cluster_count] < lower 00428 && entry < rangemax_ 00429 && pile_count (entry) <= pile_count(entry - 1); entry++) { 00430 count = pile_count(entry) - clusters[0].pile_count(entry); 00431 if (count > 0) { 00432 clusters[cluster_count].add(entry, count); 00433 clusters[0].add (entry, count); 00434 } 00435 } 00436 centres[cluster_count] = 00437 static_cast<float>(clusters[cluster_count].ile(0.5)); 00438 } 00439 } while (new_cluster && cluster_count < max_clusters); 00440 delete [] centres; 00441 return cluster_count; 00442 } 00443 00444 // Helper tests that the current index is still part of the peak and gathers 00445 // the data into the peak, returning false when the peak is ended. 00446 // src_buckets[index] - used_buckets[index] is the unused part of the histogram. 00447 // prev_count is the histogram count of the previous index on entry and is 00448 // updated to the current index on return. 00449 // total_count and total_value are accumulating the mean of the peak. 00450 static bool GatherPeak(int index, const int* src_buckets, int* used_buckets, 00451 int* prev_count, int* total_count, double* total_value) { 00452 int pile_count = src_buckets[index] - used_buckets[index]; 00453 if (pile_count <= *prev_count && pile_count > 0) { 00454 // Accumulate count and index.count product. 00455 *total_count += pile_count; 00456 *total_value += index * pile_count; 00457 // Mark this index as used 00458 used_buckets[index] = src_buckets[index]; 00459 *prev_count = pile_count; 00460 return true; 00461 } else { 00462 return false; 00463 } 00464 } 00465 00466 // Finds (at most) the top max_modes modes, well actually the whole peak around 00467 // each mode, returning them in the given modes vector as a <mean of peak, 00468 // total count of peak> pair in order of decreasing total count. 00469 // Since the mean is the key and the count the data in the pair, a single call 00470 // to sort on the output will re-sort by increasing mean of peak if that is 00471 // more useful than decreasing total count. 00472 // Returns the actual number of modes found. 00473 int STATS::top_n_modes(int max_modes, 00474 GenericVector<KDPairInc<float, int> >* modes) const { 00475 if (max_modes <= 0) return 0; 00476 int src_count = rangemax_ - rangemin_; 00477 // Used copies the counts in buckets_ as they get used. 00478 STATS used(rangemin_, rangemax_); 00479 modes->truncate(0); 00480 // Total count of the smallest peak found so far. 00481 int least_count = 1; 00482 // Mode that is used as a seed for each peak 00483 int max_count = 0; 00484 do { 00485 // Find an unused mode. 00486 max_count = 0; 00487 int max_index = 0; 00488 for (int src_index = 0; src_index < src_count; src_index++) { 00489 int pile_count = buckets_[src_index] - used.buckets_[src_index]; 00490 if (pile_count > max_count) { 00491 max_count = pile_count; 00492 max_index = src_index; 00493 } 00494 } 00495 if (max_count > 0) { 00496 // Copy the bucket count to used so it doesn't get found again. 00497 used.buckets_[max_index] = max_count; 00498 // Get the entire peak. 00499 double total_value = max_index * max_count; 00500 int total_count = max_count; 00501 int prev_pile = max_count; 00502 for (int offset = 1; max_index + offset < src_count; ++offset) { 00503 if (!GatherPeak(max_index + offset, buckets_, used.buckets_, 00504 &prev_pile, &total_count, &total_value)) 00505 break; 00506 } 00507 prev_pile = buckets_[max_index]; 00508 for (int offset = 1; max_index - offset >= 0; ++offset) { 00509 if (!GatherPeak(max_index - offset, buckets_, used.buckets_, 00510 &prev_pile, &total_count, &total_value)) 00511 break; 00512 } 00513 if (total_count > least_count || modes->size() < max_modes) { 00514 // We definitely want this mode, so if we have enough discard the least. 00515 if (modes->size() == max_modes) 00516 modes->truncate(max_modes - 1); 00517 int target_index = 0; 00518 // Linear search for the target insertion point. 00519 while (target_index < modes->size() && 00520 (*modes)[target_index].data >= total_count) 00521 ++target_index; 00522 float peak_mean = 00523 static_cast<float>(total_value / total_count + rangemin_); 00524 modes->insert(KDPairInc<float, int>(peak_mean, total_count), 00525 target_index); 00526 least_count = modes->back().data; 00527 } 00528 } 00529 } while (max_count > 0); 00530 return modes->size(); 00531 } 00532 00533 /********************************************************************** 00534 * STATS::print 00535 * 00536 * Prints a summary and table of the histogram. 00537 **********************************************************************/ 00538 void STATS::print() const { 00539 if (buckets_ == NULL) { 00540 return; 00541 } 00542 inT32 min = min_bucket() - rangemin_; 00543 inT32 max = max_bucket() - rangemin_; 00544 00545 int num_printed = 0; 00546 for (int index = min; index <= max; index++) { 00547 if (buckets_[index] != 0) { 00548 tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]); 00549 if (++num_printed % 8 == 0) 00550 tprintf ("\n"); 00551 } 00552 } 00553 tprintf ("\n"); 00554 print_summary(); 00555 } 00556 00557 00558 00559 /********************************************************************** 00560 * STATS::print_summary 00561 * 00562 * Print a summary of the stats. 00563 **********************************************************************/ 00564 void STATS::print_summary() const { 00565 if (buckets_ == NULL) { 00566 return; 00567 } 00568 inT32 min = min_bucket(); 00569 inT32 max = max_bucket(); 00570 tprintf("Total count=%d\n", total_count_); 00571 tprintf("Min=%.2f Really=%d\n", ile(0.0), min); 00572 tprintf("Lower quartile=%.2f\n", ile(0.25)); 00573 tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5)); 00574 tprintf("Upper quartile=%.2f\n", ile(0.75)); 00575 tprintf("Max=%.2f Really=%d\n", ile(1.0), max); 00576 tprintf("Range=%d\n", max + 1 - min); 00577 tprintf("Mean= %.2f\n", mean()); 00578 tprintf("SD= %.2f\n", sd()); 00579 } 00580 00581 00582 /********************************************************************** 00583 * STATS::plot 00584 * 00585 * Draw a histogram of the stats table. 00586 **********************************************************************/ 00587 00588 #ifndef GRAPHICS_DISABLED 00589 void STATS::plot(ScrollView* window, // to draw in 00590 float xorigin, // bottom left 00591 float yorigin, 00592 float xscale, // one x unit 00593 float yscale, // one y unit 00594 ScrollView::Color colour) const { // colour to draw in 00595 if (buckets_ == NULL) { 00596 return; 00597 } 00598 window->Pen(colour); 00599 00600 for (int index = 0; index < rangemax_ - rangemin_; index++) { 00601 window->Rectangle( xorigin + xscale * index, yorigin, 00602 xorigin + xscale * (index + 1), 00603 yorigin + yscale * buckets_[index]); 00604 } 00605 } 00606 #endif 00607 00608 00609 /********************************************************************** 00610 * STATS::plotline 00611 * 00612 * Draw a histogram of the stats table. (Line only) 00613 **********************************************************************/ 00614 00615 #ifndef GRAPHICS_DISABLED 00616 void STATS::plotline(ScrollView* window, // to draw in 00617 float xorigin, // bottom left 00618 float yorigin, 00619 float xscale, // one x unit 00620 float yscale, // one y unit 00621 ScrollView::Color colour) const { // colour to draw in 00622 if (buckets_ == NULL) { 00623 return; 00624 } 00625 window->Pen(colour); 00626 window->SetCursor(xorigin, yorigin + yscale * buckets_[0]); 00627 for (int index = 0; index < rangemax_ - rangemin_; index++) { 00628 window->DrawTo(xorigin + xscale * index, 00629 yorigin + yscale * buckets_[index]); 00630 } 00631 } 00632 #endif 00633 00634 00635 /********************************************************************** 00636 * choose_nth_item 00637 * 00638 * Returns the index of what would b the nth item in the array 00639 * if the members were sorted, without actually sorting. 00640 **********************************************************************/ 00641 00642 inT32 choose_nth_item(inT32 index, float *array, inT32 count) { 00643 inT32 next_sample; // next one to do 00644 inT32 next_lesser; // space for new 00645 inT32 prev_greater; // last one saved 00646 inT32 equal_count; // no of equal ones 00647 float pivot; // proposed median 00648 float sample; // current sample 00649 00650 if (count <= 1) 00651 return 0; 00652 if (count == 2) { 00653 if (array[0] < array[1]) { 00654 return index >= 1 ? 1 : 0; 00655 } 00656 else { 00657 return index >= 1 ? 0 : 1; 00658 } 00659 } 00660 else { 00661 if (index < 0) 00662 index = 0; // ensure legal 00663 else if (index >= count) 00664 index = count - 1; 00665 equal_count = (inT32) (rand() % count); 00666 pivot = array[equal_count]; 00667 // fill gap 00668 array[equal_count] = array[0]; 00669 next_lesser = 0; 00670 prev_greater = count; 00671 equal_count = 1; 00672 for (next_sample = 1; next_sample < prev_greater;) { 00673 sample = array[next_sample]; 00674 if (sample < pivot) { 00675 // shuffle 00676 array[next_lesser++] = sample; 00677 next_sample++; 00678 } 00679 else if (sample > pivot) { 00680 prev_greater--; 00681 // juggle 00682 array[next_sample] = array[prev_greater]; 00683 array[prev_greater] = sample; 00684 } 00685 else { 00686 equal_count++; 00687 next_sample++; 00688 } 00689 } 00690 for (next_sample = next_lesser; next_sample < prev_greater;) 00691 array[next_sample++] = pivot; 00692 if (index < next_lesser) 00693 return choose_nth_item (index, array, next_lesser); 00694 else if (index < prev_greater) 00695 return next_lesser; // in equal bracket 00696 else 00697 return choose_nth_item (index - prev_greater, 00698 array + prev_greater, 00699 count - prev_greater) + prev_greater; 00700 } 00701 } 00702 00703 /********************************************************************** 00704 * choose_nth_item 00705 * 00706 * Returns the index of what would be the nth item in the array 00707 * if the members were sorted, without actually sorting. 00708 **********************************************************************/ 00709 inT32 choose_nth_item(inT32 index, void *array, inT32 count, size_t size, 00710 int (*compar)(const void*, const void*)) { 00711 int result; // of compar 00712 inT32 next_sample; // next one to do 00713 inT32 next_lesser; // space for new 00714 inT32 prev_greater; // last one saved 00715 inT32 equal_count; // no of equal ones 00716 inT32 pivot; // proposed median 00717 00718 if (count <= 1) 00719 return 0; 00720 if (count == 2) { 00721 if (compar (array, (char *) array + size) < 0) { 00722 return index >= 1 ? 1 : 0; 00723 } 00724 else { 00725 return index >= 1 ? 0 : 1; 00726 } 00727 } 00728 if (index < 0) 00729 index = 0; // ensure legal 00730 else if (index >= count) 00731 index = count - 1; 00732 pivot = (inT32) (rand () % count); 00733 swap_entries (array, size, pivot, 0); 00734 next_lesser = 0; 00735 prev_greater = count; 00736 equal_count = 1; 00737 for (next_sample = 1; next_sample < prev_greater;) { 00738 result = 00739 compar ((char *) array + size * next_sample, 00740 (char *) array + size * next_lesser); 00741 if (result < 0) { 00742 swap_entries (array, size, next_lesser++, next_sample++); 00743 // shuffle 00744 } 00745 else if (result > 0) { 00746 prev_greater--; 00747 swap_entries(array, size, prev_greater, next_sample); 00748 } 00749 else { 00750 equal_count++; 00751 next_sample++; 00752 } 00753 } 00754 if (index < next_lesser) 00755 return choose_nth_item (index, array, next_lesser, size, compar); 00756 else if (index < prev_greater) 00757 return next_lesser; // in equal bracket 00758 else 00759 return choose_nth_item (index - prev_greater, 00760 (char *) array + size * prev_greater, 00761 count - prev_greater, size, 00762 compar) + prev_greater; 00763 } 00764 00765 /********************************************************************** 00766 * swap_entries 00767 * 00768 * Swap 2 entries of arbitrary size in-place in a table. 00769 **********************************************************************/ 00770 void swap_entries(void *array, // array of entries 00771 size_t size, // size of entry 00772 inT32 index1, // entries to swap 00773 inT32 index2) { 00774 char tmp; 00775 char *ptr1; // to entries 00776 char *ptr2; 00777 size_t count; // of bytes 00778 00779 ptr1 = reinterpret_cast<char*>(array) + index1 * size; 00780 ptr2 = reinterpret_cast<char*>(array) + index2 * size; 00781 for (count = 0; count < size; count++) { 00782 tmp = *ptr1; 00783 *ptr1++ = *ptr2; 00784 *ptr2++ = tmp; // tedious! 00785 } 00786 }