Calculating skew of text OpenCV
Asked Answered
B

2

5

I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.

Currently this is the function I am using:

double compute_skew(Mat &img)
{

    // Binarize
    cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);

    // Invert colors
    cv::bitwise_not(img, img);

    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
    cv::erode(img, img, element);

    std::vector<cv::Point> points;
    cv::Mat_<uchar>::iterator it = img.begin<uchar>();
    cv::Mat_<uchar>::iterator end = img.end<uchar>();
    for (; it != end; ++it)
        if (*it)
            points.push_back(it.pos());

    cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);

    return angle;
}

When I look at then angle in debug I get 0.000000 enter image description here

However when I give it this image I get proper results of a skew of about 16 degrees:

enter image description here

How can I properly detect the skew in the first image?

Beefwood answered 4/6, 2014 at 19:37 Comment(0)
P
13

there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.

i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.

you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.

results (all run using preprocess2, all run using same parameter set)

code

#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

void hough_transform(Mat& im,Mat& orig,double* skew)
{
    double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
    int angleBins = 180;
    Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
    int cenx = im.cols/2;
    int ceny = im.rows/2;
    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {
            if(im.at<uchar>(y,x)==255)
            {
                for(int t=0;t<angleBins;t++)
                {
                    double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t    /angleBins*CV_PI);
                    r+=max_r;
                    acc.at<int>(t,int(r))++;
                }
            }
        }
    }
    Mat thresh;
    normalize(acc,acc,255,0,NORM_MINMAX);
    convertScaleAbs(acc,acc);
    /*debug
    Mat cmap;
    applyColorMap(acc,cmap,COLORMAP_JET);
    imshow("cmap",cmap);
    imshow("acc",acc);*/

    Point maxLoc;
    minMaxLoc(acc,0,0,0,&maxLoc);
    double theta = (double)maxLoc.y/angleBins*CV_PI;
    double rho = maxLoc.x-max_r;
    if(abs(sin(theta))<0.000001)//check vertical
    {
        //when vertical, line equation becomes
        //x = rho
        double m = -cos(theta)/sin(theta);
        Point2d p1 = Point2d(rho+im.cols/2,0);
        Point2d p2 = Point2d(rho+im.cols/2,im.rows);
        line(orig,p1,p2,Scalar(0,0,255),1);
        *skew=90;
        cout<<"skew angle "<<" 90"<<endl;
    }else
    {
        //convert normal form back to slope intercept form
        //y = mx + b
        double m = -cos(theta)/sin(theta);
        double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
        Point2d p1 = Point2d(0,b);
        Point2d p2 = Point2d(im.cols,im.cols*m+b);
        line(orig,p1,p2,Scalar(0,0,255),1);
        double skewangle;
        skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2.    x-p1.x)*180./CV_PI);
        *skew=skewangle;
        cout<<"skew angle "<<skewangle<<endl;
    }
    imshow("orig",orig);
}

Mat preprocess1(Mat& im)
{
    Mat ret = Mat::zeros(im.size(),CV_32SC1);

    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {

            int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
            int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
            int g2 = (gy*gy + gx*gx);
            ret.at<int>(y,x)=g2;
        }
    }
    normalize(ret,ret,255,0,NORM_MINMAX);
    ret.convertTo(ret,CV_8UC1);
    threshold(ret,ret,50,255,THRESH_BINARY);
    return ret;
}

Mat preprocess2(Mat& im)
{
    // 1) assume white on black and does local thresholding
    // 2) only allow voting top is white and buttom is black(buttom text line)
    Mat thresh;
    //thresh=255-im;
    thresh=im.clone();
    adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
    Mat ret = Mat::zeros(im.size(),CV_8UC1);
    for(int x=1;x<thresh.cols-1;x++)
    {
        for(int y=1;y<thresh.rows-1;y++)
        {
            bool toprowblack = thresh.at<uchar>(y-1,x)==0 ||  thresh.at<uchar>(y-1,x-1)==0     || thresh.at<uchar>(y-1,x+1)==0;
            bool belowrowblack = thresh.at<uchar>(y+1,x)==0 ||  thresh.at<uchar>(y+1,    x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;

            uchar pix=thresh.at<uchar>(y,x);
            if((!toprowblack && pix==255 && belowrowblack))
            {
                ret.at<uchar>(y,x) = 255;
            }
        }
    }
    return ret;
}
Mat rot(Mat& im,double thetaRad)
{
    cv::Mat rotated;
    double rskew = thetaRad* CV_PI/180;
    double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
    double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
    cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
    Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
    pos.at<double>(0)=(nw-im.cols)*.5;
    pos.at<double>(1)=(nh-im.rows)*.5;
    Mat res = rot_mat*pos;
    rot_mat.at<double>(0,2) += res.at<double>(0);
    rot_mat.at<double>(1,2) += res.at<double>(1);
    cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
    return rotated;
}

int main(int argc, char** argv)
{
    string src="C:/data/skew.png";
    Mat im= imread(src);
    Mat gray;
    cvtColor(im,gray,CV_BGR2GRAY);

    Mat preprocessed = preprocess2(gray);
    imshow("preprocessed2",preprocessed);
    double skew;
    hough_transform(preprocessed,im,&skew);
    Mat rotated = rot(im,skew* CV_PI/180);
    imshow("corrected",rotated);

    waitKey(0);
    return 0;
}
Papoose answered 9/6, 2014 at 13:42 Comment(11)
I change my idea your pictures is good. But code is not. Rotate my images with wrong angles...Salamanca
can you post the images? maybe i can see if they can be made to work at allPapoose
Thaks for your reply. Can you help me for Crop Text like that #23125859 I cant implement that link to my codes. May be after i need rotate again.Salamanca
hmm..it's a different problem. the answer i provided assume that the image is already cropped around the text. it only concerns itself with finding the rotation angle. according to my experience, it works best with real camera images as input..not so well with synthetic or already processed images. you can tweak the function preprocess2(Mat& im) to fit your needs. for actual cropping of images, you would need to find a different algorithmPapoose
It's better than this one: felix.abecassis.me/2011/09/opencv-detect-skew-angle Thanks!Angelikaangelina
Thanks a lot. This is great. But I am afraid, there is no any usage of the function preprocess1. Can u explain it? Or is that a mistake?Sathrum
It's just an alternative to preprocess2. You can use that or use preprocess2 and the results will be different(perhaps better depending on your application. For detecting text, preprocess2 is better.). Note that preprocess1 is just a standard edge detector.Papoose
This code is great, but I'm wondering why the skew angle is always rounded to the nearest degree.Vertievertiginous
Hi, that's because of 180 bins are allocated(one bin for each angle) when calculating hough transform. You can increase it to higher bins to calculate finer angles though I doubt whether it has meaningful impact on performance.Papoose
Thanks but it seems that when you increase the number of bins you also get slower processing time. Any ideas?Vertievertiginous
Yes that's correct. Performance is quadratic to the number of bins(I think). I have not really focused on performance on this. If you want faster, you can take a look at opencv implementation although theirs might be harder to understand. Or you can take a look at implementing this using simd instructions like sse.Papoose
H
2

the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.

otsu trial:

int main()
{
    Mat input = imread("your text");
    cvtColor(input, input, CV_BGR2GRAY);
    Mat img;
    cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);

    cv::bitwise_not(img, img);
    imshow("img ", img);
    waitKey(0);

    vector<Point> points;
    findNonZero(img, points);
    cv::RotatedRect box = cv::minAreaRect(points);

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
    imshow("img ", img);
    waitKey(0);

    return 0;
}

enter image description here

Humid answered 4/6, 2014 at 22:23 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.