strcmp implementation not working with special characters
Asked Answered
M

2

6

I'm trying to implement my own strcmp function, my strcmp bahaves differently when I use special characters.

#include <string.h>    

int my_strcmp(const char *s1, const char *s2)
{
    const char  *str1;
    const char  *str2;

    str1 = s1;
    str2 = s2;
    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int main()
{
   char *src = "a§bcDef";
   char *des = "acbcDef";
   printf("%d %d\n", my_strcmp(des, src), strcmp(des, src));
   return(0);
}

OUTPUT

161 -95

Monochromat answered 15/5, 2016 at 10:18 Comment(4)
You need to return *str2 - *str1 methinks.Junina
@FUZxxl when I do that, I get -161 and -95, I'm not sure if, it is right because the sign are the same.Monochromat
If the signs match, your function works correctly.Junina
As FUZxxl said, if signs match your function works correctly. Return values of strcmp() are specified as being negative, zero, or positive. Specific negative or positive values are neither required not guaranteed.Mock
K
1

Here's what the standard says about strcmp, with a relevant section highlighted in bold:

The sign of a non-zero return value shall be determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type unsigned char) that differ in the strings being compared.

Your code is taking the difference of the bytes as char, which if signed differs from the spec.

Instead:

return (unsigned char)(*str1) - (unsigned char)(*str2);

Here's some test cases for the original code (my_strcmp), the currently accepted answer of dasblinkenlight (my_strcmp1), and this answer (my_strcmp2). Only my_strcmp2 passes the tests.

#include <string.h>
#include <stdio.h>

int my_strcmp(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int my_strcmp1(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (signed char)(*str1 - *str2);
}

int my_strcmp2(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (unsigned char)(*str1) - (unsigned char)(*str2);
}


int sgn(int a) {
    return a > 0 ? 1 : a < 0 ? -1 : 0;
}

#define TEST(sc, a, b) do { \
    if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
        printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
        fail = 1; \
    } } while(0)

int main(int argc, char *argv[]) {
    struct {
        const char *a;
        const char *b;
    }cases[] = {
        {"abc", "abc"},
        {"\x01", "\xff"},
        {"\xff", "\x01"},
        {"abc", "abd"},
        {"", ""},
    };
    int fail = 0;
    for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
        TEST(my_strcmp, cases[i].a, cases[i].b);
        TEST(my_strcmp1, cases[i].a, cases[i].b);
        TEST(my_strcmp2, cases[i].a, cases[i].b);
    }
    return fail;
}

(note: I put in some explicit signed in the implementations so that the code can be tested on compilers with unsigned char). Also, sorry about the macro -- this was a quick hack to test!

Knowhow answered 15/5, 2016 at 11:0 Comment(2)
Nice, indeed ... :-)Duodenitis
And the only one disliking your macro seems to be the SO C source parser ... ;-)Duodenitis
H
3

char is signed in many implementations, and your strcmp implementation considers char values < 0 to be smaller than those greater than 0. Perhaps you want to compare the unsigned values instead.

const unsigned char *str1 = (unsigned char*) s1;
const unsigned char *str2 = (unsigned char*) s2;
Huertas answered 15/5, 2016 at 10:41 Comment(0)
K
1

Here's what the standard says about strcmp, with a relevant section highlighted in bold:

The sign of a non-zero return value shall be determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type unsigned char) that differ in the strings being compared.

Your code is taking the difference of the bytes as char, which if signed differs from the spec.

Instead:

return (unsigned char)(*str1) - (unsigned char)(*str2);

Here's some test cases for the original code (my_strcmp), the currently accepted answer of dasblinkenlight (my_strcmp1), and this answer (my_strcmp2). Only my_strcmp2 passes the tests.

#include <string.h>
#include <stdio.h>

int my_strcmp(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int my_strcmp1(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (signed char)(*str1 - *str2);
}

int my_strcmp2(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (unsigned char)(*str1) - (unsigned char)(*str2);
}


int sgn(int a) {
    return a > 0 ? 1 : a < 0 ? -1 : 0;
}

#define TEST(sc, a, b) do { \
    if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
        printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
        fail = 1; \
    } } while(0)

int main(int argc, char *argv[]) {
    struct {
        const char *a;
        const char *b;
    }cases[] = {
        {"abc", "abc"},
        {"\x01", "\xff"},
        {"\xff", "\x01"},
        {"abc", "abd"},
        {"", ""},
    };
    int fail = 0;
    for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
        TEST(my_strcmp, cases[i].a, cases[i].b);
        TEST(my_strcmp1, cases[i].a, cases[i].b);
        TEST(my_strcmp2, cases[i].a, cases[i].b);
    }
    return fail;
}

(note: I put in some explicit signed in the implementations so that the code can be tested on compilers with unsigned char). Also, sorry about the macro -- this was a quick hack to test!

Knowhow answered 15/5, 2016 at 11:0 Comment(2)
Nice, indeed ... :-)Duodenitis
And the only one disliking your macro seems to be the SO C source parser ... ;-)Duodenitis

© 2022 - 2024 — McMap. All rights reserved.