26 #include "config_auto.h" 34 static BOOL_VAR(edges_use_new_outline_complexity,
false,
35 "Use the new outline complexity module");
36 static INT_VAR(edges_max_children_per_outline, 10,
37 "Max number of children inside a character outline");
38 static INT_VAR(edges_max_children_layers, 5,
39 "Max layers of nested children inside a character outline");
41 "turn on debugging for this module");
43 static INT_VAR(edges_children_per_grandchild, 10,
44 "Importance ratio for chucking outlines");
45 static INT_VAR(edges_children_count_limit, 45,
46 "Max holes allowed in blob");
47 static BOOL_VAR(edges_children_fix,
false,
48 "Remove boxy parents of char-like children");
49 static INT_VAR(edges_min_nonhole, 12,
50 "Min pixels for potential char in box");
51 static INT_VAR(edges_patharea_ratio, 40,
52 "Max lensq/area for acceptable child outline");
54 "Min area fraction of child outline");
56 "Min area fraction of grandchild for box");
66 ICOORD tright): bl(bleft), tr(tright) {
70 buckets.reset(
new C_OUTLINE_LIST[bxdim * bydim]);
117 int16_t xindex, yindex;
120 int32_t grandchild_count;
121 C_OUTLINE_IT child_it;
129 grandchild_count = 0;
130 if (++depth > edges_max_children_layers)
131 return max_count + depth;
133 for (yindex = ymin; yindex <= ymax; yindex++) {
134 for (xindex = xmin; xindex <= xmax; xindex++) {
135 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
136 if (child_it.empty())
138 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
139 child_it.forward()) {
140 child = child_it.data();
141 if (child == outline || !(*child < *outline))
145 if (child_count > edges_max_children_per_outline) {
147 tprintf(
"Discard outline on child_count=%d > " 148 "max_children_per_outline=%d\n",
150 static_cast<int32_t>(edges_max_children_per_outline));
151 return max_count + child_count;
155 int32_t remaining_count = max_count - child_count - grandchild_count;
156 if (remaining_count > 0)
157 grandchild_count += edges_children_per_grandchild *
159 if (child_count + grandchild_count > max_count) {
161 tprintf(
"Disgard outline on child_count=%d + grandchild_count=%d " 163 child_count, grandchild_count, max_count);
164 return child_count + grandchild_count;
169 return child_count + grandchild_count;
186 int16_t xindex, yindex;
189 int32_t grandchild_count;
191 float max_parent_area;
193 int32_t child_length;
195 C_OUTLINE_IT child_it;
203 grandchild_count = 0;
207 for (yindex = ymin; yindex <= ymax; yindex++) {
208 for (xindex = xmin; xindex <= xmax; xindex++) {
209 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
210 if (child_it.empty())
212 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
213 child_it.forward()) {
214 child = child_it.data();
215 if (child != outline && *child < *outline) {
217 if (child_count <= max_count) {
218 int max_grand =(max_count - child_count) /
219 edges_children_per_grandchild;
222 edges_children_per_grandchild;
226 if (child_count + grandchild_count > max_count) {
228 tprintf(
"Discarding parent with child count=%d, gc=%d\n",
229 child_count,grandchild_count);
230 return child_count + grandchild_count;
232 if (parent_area == 0) {
235 parent_area = -parent_area;
237 if (parent_area < max_parent_area)
241 (!edges_children_fix ||
245 child_area = -child_area;
246 if (edges_children_fix) {
247 if (parent_area - child_area < max_parent_area) {
251 if (grandchild_count > 0) {
253 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 255 parent_area, child_area, max_parent_area,
257 return max_count + 1;
260 if (child_length * child_length >
261 child_area * edges_patharea_ratio) {
263 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 264 "with child length=%d\n",
265 parent_area, child_area, max_parent_area,
267 return max_count + 1;
270 if (child_area < child->bounding_box().area() * edges_childarea) {
272 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 273 "with child rect=%d\n",
274 parent_area, child_area, max_parent_area,
276 return max_count + 1;
283 return child_count + grandchild_count;
301 int16_t xindex, yindex;
303 C_OUTLINE_IT child_it;
310 for (yindex = ymin; yindex <= ymax; yindex++) {
311 for (xindex = xmin; xindex <= xmax; xindex++) {
312 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
313 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
314 child_it.forward()) {
315 if (*child_it.data() < *outline) {
316 it->add_after_then_move(child_it.extract());
332 C_OUTLINE_LIST outlines;
333 C_OUTLINE_IT out_it = &outlines;
354 C_OUTLINE_LIST *outlines) {
370 C_OUTLINE_LIST *outlines,
374 C_OUTLINE_IT out_it = outlines;
375 C_OUTLINE_IT bucket_it;
378 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
379 outline = out_it.extract();
382 bucket_it.set_to_list((*buckets) (ol_box.
left(), ol_box.
bottom()));
383 bucket_it.add_to_end(outline);
399 C_OUTLINE_LIST outlines;
401 C_OUTLINE_IT out_it = &outlines;
402 C_OUTLINE_IT bucket_it = buckets->
start_scan();
403 C_OUTLINE_IT parent_it;
404 C_BLOB_IT good_blobs = block->
blob_list();
407 while (!bucket_it.empty()) {
408 out_it.set_to_list(&outlines);
410 parent_it = bucket_it;
413 }
while (!bucket_it.at_first() &&
414 !(*parent_it.data() < *bucket_it.data()));
415 }
while (!bucket_it.at_first());
418 out_it.add_after_then_move(parent_it.extract());
423 bucket_it.set_to_list(buckets->
scan_next());
438 C_BLOB_IT* reject_it,
439 C_OUTLINE_IT* blob_it
444 outline = blob_it->data();
445 if (edges_use_new_outline_complexity)
447 edges_children_count_limit,
451 edges_children_count_limit);
452 if (child_count > edges_children_count_limit)
bool capture_children(OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)
void empty_buckets(BLOCK *block, OL_BUCKETS *buckets)
#define INT_VAR(name, val, comment)
const TBOX & bounding_box() const
#define double_VAR(name, val, comment)
int32_t count_children(C_OUTLINE *outline, int32_t max_count)
void extract_edges(Pix *pix, BLOCK *block)
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list, C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it)
int16_t y() const
access_function
int32_t outline_complexity(C_OUTLINE *outline, int32_t max_count, int16_t depth)
void block_edges(Pix *t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
C_BLOB_LIST * reject_blobs()
int32_t pathlength() const
int32_t outer_area() const
int16_t x() const
access function
DLLSYM void tprintf(const char *format,...)
void fill_buckets(C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
PDBLK pdblk
Page Description Block.
C_OUTLINE_LIST * start_scan()
#define BOOL_VAR(name, val, comment)
C_OUTLINE_LIST * scan_next()
void extract_children(C_OUTLINE *outline, C_OUTLINE_IT *it)
C_BLOB_LIST * blob_list()
get blobs
C_OUTLINE_LIST * operator()(int16_t x, int16_t y)
OL_BUCKETS(ICOORD bleft, ICOORD tright)