How do I do the in-place equivalent of strstr()
for a counted string (i.e. not null-terminated) in C?
If you're afraid of O(m*n) behaviour - basically, you needn't, such cases don't occur naturally - here's a KMP implementation I had lying around which I've modified to take the length of the haystack. Also a wrapper. If you want to do repeated searches, write your own and reuse the borders
array.
No guarantees for bug-freeness, but it seems to still work.
int *kmp_borders(char *needle, size_t nlen){
if (!needle) return NULL;
int i, j, *borders = malloc((nlen+1)*sizeof(*borders));
if (!borders) return NULL;
i = 0;
j = -1;
borders[i] = j;
while((size_t)i < nlen){
while(j >= 0 && needle[i] != needle[j]){
j = borders[j];
}
++i;
++j;
borders[i] = j;
}
return borders;
}
char *kmp_search(char *haystack, size_t haylen, char *needle, size_t nlen, int *borders){
size_t max_index = haylen-nlen, i = 0, j = 0;
while(i <= max_index){
while(j < nlen && *haystack && needle[j] == *haystack){
++j;
++haystack;
}
if (j == nlen){
return haystack-nlen;
}
if (!(*haystack)){
return NULL;
}
if (j == 0){
++haystack;
++i;
} else {
do{
i += j - (size_t)borders[j];
j = borders[j];
}while(j > 0 && needle[j] != *haystack);
}
}
return NULL;
}
char *sstrnstr(char *haystack, char *needle, size_t haylen){
if (!haystack || !needle){
return NULL;
}
size_t nlen = strlen(needle);
if (haylen < nlen){
return NULL;
}
int *borders = kmp_borders(needle, nlen);
if (!borders){
return NULL;
}
char *match = kmp_search(haystack, haylen, needle, nlen, borders);
free(borders);
return match;
}
See if the function below works for you. I haven't tested it thoroughly, so I would suggest you do so.
char *sstrstr(char *haystack, char *needle, size_t length)
{
size_t needle_length = strlen(needle);
size_t i;
for (i = 0; i < length; i++) {
if (i + needle_length > length) {
return NULL;
}
if (strncmp(&haystack[i], needle, needle_length) == 0) {
return &haystack[i];
}
}
return NULL;
}
strstr
is O(m + n). So I'm looking for something that's not ridiculously slow like my version. :-) But +1 anyway, since the idea works. –
Midyear strstr
is typically defined to be an O(mn) operation?? Thanks for pointing that out... then I'll probably accept this in a bit, since it's the exact substitute for the question. –
Midyear strstr()
. –
Weatherley if (i + needle_length > length)
by just reducing length
appropriately so the for
loop condition is correct. if (needle_length > length) return NULL;
, then length -= needle_length;
, and the for
loop becomes for (i = 0; i <= length; i++)
(changing <
to <=
to ensure you check the last possible position). The first if
check is only necessary because you use size_t
; using ssize_t
would remove the need for that check. Good compiler might be smart enough to eliminate if
for you, but why write complex code and hope if you don't need to. –
Imbecilic memcmp
to avoid NUL
checking of strncmp
. 2) Using memchr
to find the first character of the needle in the haystack, so you're not making a strncmp
/memcmp
function call for every character (likely reducing the number of calls by a factor of 50x-100x). Or skip memchr and just test the first character manually before calling strncmp
/memcmp
. –
Imbecilic If you're afraid of O(m*n) behaviour - basically, you needn't, such cases don't occur naturally - here's a KMP implementation I had lying around which I've modified to take the length of the haystack. Also a wrapper. If you want to do repeated searches, write your own and reuse the borders
array.
No guarantees for bug-freeness, but it seems to still work.
int *kmp_borders(char *needle, size_t nlen){
if (!needle) return NULL;
int i, j, *borders = malloc((nlen+1)*sizeof(*borders));
if (!borders) return NULL;
i = 0;
j = -1;
borders[i] = j;
while((size_t)i < nlen){
while(j >= 0 && needle[i] != needle[j]){
j = borders[j];
}
++i;
++j;
borders[i] = j;
}
return borders;
}
char *kmp_search(char *haystack, size_t haylen, char *needle, size_t nlen, int *borders){
size_t max_index = haylen-nlen, i = 0, j = 0;
while(i <= max_index){
while(j < nlen && *haystack && needle[j] == *haystack){
++j;
++haystack;
}
if (j == nlen){
return haystack-nlen;
}
if (!(*haystack)){
return NULL;
}
if (j == 0){
++haystack;
++i;
} else {
do{
i += j - (size_t)borders[j];
j = borders[j];
}while(j > 0 && needle[j] != *haystack);
}
}
return NULL;
}
char *sstrnstr(char *haystack, char *needle, size_t haylen){
if (!haystack || !needle){
return NULL;
}
size_t nlen = strlen(needle);
if (haylen < nlen){
return NULL;
}
int *borders = kmp_borders(needle, nlen);
if (!borders){
return NULL;
}
char *match = kmp_search(haystack, haylen, needle, nlen, borders);
free(borders);
return match;
}
I just came across this and I'd like to share my implementation. It think it quite fast a I don't have any subcalls.
It returns the index in the haystack where the needle is found or -1 if it was not found.
/* binary search in memory */
int memsearch(const char *hay, int haysize, const char *needle, int needlesize) {
int haypos, needlepos;
haysize -= needlesize;
for (haypos = 0; haypos <= haysize; haypos++) {
for (needlepos = 0; needlepos < needlesize; needlepos++) {
if (hay[haypos + needlepos] != needle[needlepos]) {
// Next character in haystack.
break;
}
}
if (needlepos == needlesize) {
return haypos;
}
}
return -1;
}
I used this method
int memsearch(char* dataset, int datasetLength, char* target, int targetLen){
for(int i = 0; i < datasetLength; i++){
if(dataset[i] == target[0]){
int found = 1;
for(int j = 0; j < targetLen; j++){
int k = i + j;
if(k >= datasetLength || target[j] != dataset[k]){
found = 0;
break;
}
}
if(found) return i;
}
}
return -1;
}
© 2022 - 2024 — McMap. All rights reserved.
strnstr()
from BSD. But look out for this bug: mikeash.com/pyblog/dont-use-strnstr.html – Sollie\0
s and is O(4*n). – Necropolism
th character.) – Fridafriday