#include <boxchar.h>
List of all members.
Public Member Functions |
| | BoxChar (const char *utf8_str, int len) |
| | ~BoxChar () |
| const string & | ch () const |
| const Box * | box () const |
| const int & | page () const |
| void | AddBox (int x, int y, int width, int height) |
| void | set_page (int page) |
| string * | mutable_ch () |
| Box * | mutable_box () |
| bool | operator< (const BoxChar &other) const |
Static Public Member Functions |
| static void | TranslateBoxes (int xshift, int yshift, vector< BoxChar * > *boxes) |
| static void | PrepareToWrite (vector< BoxChar * > *boxes) |
| static void | InsertNewlines (bool rtl_rules, bool vertical_rules, vector< BoxChar * > *boxes) |
| static void | InsertSpaces (bool rtl_rules, bool vertical_rules, vector< BoxChar * > *boxes) |
| static void | ReorderRTLText (vector< BoxChar * > *boxes) |
| static bool | ContainsMostlyRTL (const vector< BoxChar * > &boxes) |
| static bool | MostlyVertical (const vector< BoxChar * > &boxes) |
| static int | TotalByteLength (const vector< BoxChar * > &boxes) |
| static void | RotateBoxes (float rotation, int xcenter, int ycenter, int start_box, int end_box, vector< BoxChar * > *boxes) |
| static void | WriteTesseractBoxFile (const string &name, int height, const vector< BoxChar * > &boxes) |
Detailed Description
Definition at line 40 of file boxchar.h.
Constructor & Destructor Documentation
| tesseract::BoxChar::BoxChar |
( |
const char * |
utf8_str, |
|
|
int |
len |
|
) |
| |
Definition at line 41 of file boxchar.cpp.
: ch_(utf8_str, len) {
box_ = NULL;
}
| tesseract::BoxChar::~BoxChar |
( |
| ) |
|
Member Function Documentation
| void tesseract::BoxChar::AddBox |
( |
int |
x, |
|
|
int |
y, |
|
|
int |
width, |
|
|
int |
height |
|
) |
| |
Definition at line 47 of file boxchar.cpp.
{
box_ = boxCreate(x, y, width, height);
}
| const Box* tesseract::BoxChar::box |
( |
| ) |
const [inline] |
| const string& tesseract::BoxChar::ch |
( |
| ) |
const [inline] |
| bool tesseract::BoxChar::ContainsMostlyRTL |
( |
const vector< BoxChar * > & |
boxes | ) |
[static] |
Definition at line 215 of file boxchar.cpp.
{
int num_rtl = 0, num_ltr = 0;
for (int i = 0; i < boxes.size(); ++i) {
GenericVector<char32> uni_vector;
if (!UNICHAR::UTF8ToUnicode(boxes[i]->ch_.c_str(), &uni_vector)) {
tprintf("Illegal utf8 in boxchar %d string:%s = ", i,
boxes[i]->ch_.c_str());
for (int c = 0; c < boxes[i]->ch_.size(); ++c) {
tprintf(" 0x%x", boxes[i]->ch_[c]);
}
tprintf("\n");
continue;
}
for (int j = 0; j < uni_vector.size(); ++j) {
UCharDirection dir = u_charDirection(uni_vector[j]);
if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
dir == U_ARABIC_NUMBER) {
++num_rtl;
} else {
++num_ltr;
}
}
}
return num_rtl > num_ltr;
}
| void tesseract::BoxChar::InsertNewlines |
( |
bool |
rtl_rules, |
|
|
bool |
vertical_rules, |
|
|
vector< BoxChar * > * |
boxes |
|
) |
| [static] |
Definition at line 81 of file boxchar.cpp.
{
int prev_i = -1;
int max_shift = 0;
for (int i = 0; i < boxes->size(); ++i) {
Box* box = (*boxes)[i]->box_;
if (box == NULL) {
if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
do {
delete (*boxes)[i];
boxes->erase(boxes->begin() + i);
--i;
} while (i >= 0 && i + 1 == boxes->size() && (*boxes)[i]->box_ == NULL);
}
continue;
}
if (prev_i >= 0) {
Box* prev_box = (*boxes)[prev_i]->box_;
int shift = box->x - prev_box->x;
if (vertical_rules) {
shift = box->y - prev_box->y;
} else if (rtl_rules) {
shift = -shift;
}
if (-shift > max_shift) {
int width = prev_box->w;
int height = prev_box->h;
int x = prev_box->x + width;
int y = prev_box->y;
if (vertical_rules) {
x = prev_box->x;
y = prev_box->y + height;
} else if (rtl_rules) {
x = prev_box->x - width;
if (x < 0) {
tprintf("prev x = %d, width=%d\n", prev_box->x, width);
x = 0;
}
}
if (prev_i == i - 1) {
BoxChar* new_box = new BoxChar("\t", 1);
new_box->AddBox(x, y, width, height);
new_box->page_ = (*boxes)[i]->page_;
boxes->insert(boxes->begin() + i, new_box);
++i;
} else {
(*boxes)[i - 1]->AddBox(x, y, width, height);
(*boxes)[i - 1]->ch_ = "\t";
}
max_shift = 0;
} else if (shift > max_shift) {
max_shift = shift;
}
}
prev_i = i;
}
}
| void tesseract::BoxChar::InsertSpaces |
( |
bool |
rtl_rules, |
|
|
bool |
vertical_rules, |
|
|
vector< BoxChar * > * |
boxes |
|
) |
| [static] |
Definition at line 144 of file boxchar.cpp.
{
for (int i = 1; i + 1 < boxes->size(); ++i) {
Box* box = (*boxes)[i]->box_;
if (box == NULL) {
Box* prev = (*boxes)[i - 1]->box_;
Box* next = (*boxes)[i + 1]->box_;
ASSERT_HOST(prev != NULL && next != NULL);
int top = MIN(prev->y, next->y);
int bottom = MAX(prev->y + prev->h, next->y + next->h);
int left = prev->x + prev->w;
int right = next->x;
if (vertical_rules) {
top = prev->y + prev->h;
bottom = next->y;
left = MIN(prev->x, next->x);
right = MAX(prev->x + prev->w, next->x + next->w);
} else if (rtl_rules) {
right = prev->x;
left = next->x + next->w;
for (int j = i - 2;
j >= 0 && (*boxes)[j]->ch_ != " " && (*boxes)[j]->ch_ != "\t";
--j) {
prev = (*boxes)[j]->box_;
ASSERT_HOST(prev != NULL);
if (prev->x < right) {
right = prev->x;
}
}
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != NULL &&
(*boxes)[j]->ch_ != "\t";
++j) {
next = (*boxes)[j]->box_;
if (next->x + next->w > left) {
left = next->x + next->w;
}
}
}
if (right <= left) right = left + 1;
if (bottom <= top) bottom = top + 1;
(*boxes)[i]->AddBox(left, top, right - left, bottom - top);
(*boxes)[i]->ch_ = " ";
}
}
}
| bool tesseract::BoxChar::MostlyVertical |
( |
const vector< BoxChar * > & |
boxes | ) |
[static] |
Definition at line 244 of file boxchar.cpp.
{
inT64 total_dx = 0, total_dy = 0;
for (int i = 1; i < boxes.size(); ++i) {
if (boxes[i - 1]->box_ != NULL && boxes[i]->box_ != NULL &&
boxes[i - 1]->page_ == boxes[i]->page_) {
int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
if (abs(dx) > abs(dy) * kMinNewlineRatio ||
abs(dy) > abs(dx) * kMinNewlineRatio) {
total_dx += dx * dx;
total_dy += dy * dy;
}
}
}
return total_dy > total_dx;
}
| Box* tesseract::BoxChar::mutable_box |
( |
| ) |
[inline] |
| string* tesseract::BoxChar::mutable_ch |
( |
| ) |
[inline] |
| bool tesseract::BoxChar::operator< |
( |
const BoxChar & |
other | ) |
const [inline] |
Definition at line 62 of file boxchar.h.
{
if (box_ == NULL) return true;
if (other.box_ == NULL) return false;
return box_->x < other.box_->x;
}
| const int& tesseract::BoxChar::page |
( |
| ) |
const [inline] |
| void tesseract::BoxChar::PrepareToWrite |
( |
vector< BoxChar * > * |
boxes | ) |
[static] |
| void tesseract::BoxChar::ReorderRTLText |
( |
vector< BoxChar * > * |
boxes | ) |
[static] |
Definition at line 201 of file boxchar.cpp.
{
BoxCharPtrSort sorter;
int end = 0;
for (int start = 0; start < boxes->size(); start = end + 1) {
end = start + 1;
while (end < boxes->size() && (*boxes)[end]->ch_ != "\t") ++end;
sort(boxes->begin() + start, boxes->begin() + end, sorter);
}
}
| void tesseract::BoxChar::RotateBoxes |
( |
float |
rotation, |
|
|
int |
xcenter, |
|
|
int |
ycenter, |
|
|
int |
start_box, |
|
|
int |
end_box, |
|
|
vector< BoxChar * > * |
boxes |
|
) |
| [static] |
Definition at line 272 of file boxchar.cpp.
{
Boxa* orig = boxaCreate(0);
for (int i = start_box; i < end_box; ++i) {
BOX* box = (*boxes)[i]->box_;
if (box) boxaAddBox(orig, box, L_CLONE);
}
Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation);
boxaDestroy(&orig);
for (int i = start_box, box_ind = 0; i < end_box; ++i) {
if ((*boxes)[i]->box_) {
boxDestroy(&((*boxes)[i]->box_));
(*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE);
}
}
boxaDestroy(&rotated);
}
| void tesseract::BoxChar::set_page |
( |
int |
page | ) |
[inline] |
| int tesseract::BoxChar::TotalByteLength |
( |
const vector< BoxChar * > & |
boxes | ) |
[static] |
Definition at line 263 of file boxchar.cpp.
{
int total_length = 0;
for (int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
return total_length;
}
| void tesseract::BoxChar::TranslateBoxes |
( |
int |
xshift, |
|
|
int |
yshift, |
|
|
vector< BoxChar * > * |
boxes |
|
) |
| [static] |
Definition at line 52 of file boxchar.cpp.
{
for (int i = 0; i < boxes->size(); ++i) {
BOX* box = (*boxes)[i]->box_;
if (box != NULL) {
box->x += xshift;
box->y += yshift;
}
}
}
| void tesseract::BoxChar::WriteTesseractBoxFile |
( |
const string & |
name, |
|
|
int |
height, |
|
|
const vector< BoxChar * > & |
boxes |
|
) |
| [static] |
Definition at line 292 of file boxchar.cpp.
{
string output;
char buffer[kMaxLineLength];
for (int i = 0; i < boxes.size(); ++i) {
const Box* box = boxes[i]->box_;
if (box == NULL) {
tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
return;
}
int nbytes =
snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n",
boxes[i]->ch_.c_str(), box->x, height - box->y - box->h,
box->x + box->w, height - box->y, boxes[i]->page_);
output.append(buffer, nbytes);
}
File::WriteStringToFileOrDie(output, filename);
}
The documentation for this class was generated from the following files: