From a571b8c92b0ec149f7691e786671ac5939cbf3c5 Mon Sep 17 00:00:00 2001 From: dianshi Date: Sun, 10 May 2020 12:53:03 +0100 Subject: working version --- rabincarp.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) (limited to 'rabincarp.c') diff --git a/rabincarp.c b/rabincarp.c index e69de29..a58ab15 100644 --- a/rabincarp.c +++ b/rabincarp.c @@ -0,0 +1,190 @@ +#include "rabincarp.h" + + +int rlsi_hash_reset(rol_hash_isearch *rlh) +{ + rlh->pow = 1; + rlh->pattern_len = 0; + rlh->pattern_hash = 0; + rlh->text_hash = 0; + rlh->text_len = 0; + rlh->text = NULL; + rlh->pattern = NULL; + rlh->iter_pos = 0; + rlh->count = 0; + rlh->pre_text = NULL; + rlh->chunk_done = 0; + return 0; +} + +int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m) +{ + rlh->b = b; + rlh->m = m; + rlsi_hash_reset(rlh); + return 0; +} + +int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen) +{ + int i; + + if (tlentext_len = tlen; + rlh->text = text; + rlh->pattern = pattern; + rlh->pattern_len = plen; + + rlh->pow = 1; + for (i=0; ipow = (rlh->pow * rlh->b) % rlh->m; + } + + rlh->text_hash = 0; + rlh->pattern_hash = 0; + for (i=0;ipattern_hash = (rlh->pattern_hash * rlh->b + pattern[i]) % rlh->m; + rlh->text_hash = (rlh->text_hash * rlh->b + text[i]) % rlh->m; + } + + //printf("pow %d\n",rlh->pow); + //printf("pattern %d\n",rlh->pattern_hash); + //printf("text %d\n",rlh->text_hash); + + //set buffer size for presaved text + rlh->pre_text = malloc(plen);//dont check err =P, mem leak here + rlh->count = 0; + + return 0; +} + +//run after prepararion step +int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen) +{ + int j=0; + + int found=0; + + if ((rlh->text == NULL) && (rlh->pattern == NULL)) + { + rlsi_hash_precalc(rlh, pattern, plen, text, tlen); + } + + if (rlh->chunk_done == 1) + { + rlh->chunk_done = 0; + rlh->text_len = tlen; + } + + //printf("while %d",rlh->text_len-rlh->pattern_len); + //while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len)) + while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len)) + { + + //hashes are matching + if (rlh->text_hash == rlh->pattern_hash) + { + //printf("2\n"); + //check if string match + for (j=0;jpattern_len;j++) + { + //??? + char i_c=0; + int k=rlh->iter_pos+j; + if (k<0) + { + i_c = rlh->pre_text[rlh->pattern_len+k]; + } else + { + //i_c = rlh->text[rlh->iter_pos+j]; + i_c = text[rlh->iter_pos+j]; + } + if (i_c != rlh->pattern[j]) + break; + } + + if (j == rlh->pattern_len) + { + //??? + //printf("Found pattern at %d\n",rlh->iter_pos+1); + //printf("Found pattern at %d\n",rlh->count); + found = 1; + } + + } + + //??? + if (rlh->iter_pos < rlh->text_len - rlh->pattern_len) + { + //printf("1\n"); + //??? + char i_c; + if (rlh->iter_pos < 0) + { + i_c = rlh->pre_text[rlh->pattern_len+rlh->iter_pos]; + } else + { + //i_c = rlh->text[rlh->iter_pos]; + i_c = text[rlh->iter_pos]; + } + //printf("CHAR: %c\n",i_c); + //rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + rlh->text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m; + rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m; + //printf("text %d\n",rlh->text_hash); + if (rlh->text_hash < 0) + { + rlh->text_hash = rlh->text_hash + rlh->m; + } + } + + //iter over current buffer + rlh->iter_pos += 1; + //total data that have been iterated + rlh->count += 1; + + if (found == 1) + { + //rlh->count -= 1; + return 1; + } + //iter over current buffer + //rlh->iter_pos += 1; + //total data that have been iterated + //rlh->count += 1; + //printf("rlh->iter_pos %d\n",rlh->iter_pos); + //printf("rlh->count %d\n",rlh->count); + } + //printf("tlen %d\n",tlen); + //exit from while loop because end of the buffer, iter is +1 element larger + //copy pattern to intermidiate buffer + //if ((rlh->iter_pos)==(rlh->text_len - rlh->pattern_len+1)) + { + //copy last bit of buffer to further processing + //memcpy(rlh->pre_text, &rlh->text[rlh->iter_pos], rlh->pattern_len); + memcpy(rlh->pre_text, &text[rlh->iter_pos-1], rlh->pattern_len); + //printf("end of chunk\n"); + //rlh->count -= 1; + //printf("count %d\n",rlh->count); + + rlh->chunk_done = 1; + rlh->iter_pos = -rlh->pattern_len; + } + +//exit_loop: + //chunk finished search new need to be supplied pal + //printf("0\n"); + rlh->count -= 1; + return 0; +} + +int rlsi_hash_get(rol_hash_isearch *rlh) +{ + //return rlh->iter_pos; + return rlh->count; +} \ No newline at end of file -- cgit v1.2.3