summaryrefslogtreecommitdiff
path: root/rabincarp.c
diff options
context:
space:
mode:
Diffstat (limited to 'rabincarp.c')
-rw-r--r--rabincarp.c190
1 files changed, 190 insertions, 0 deletions
diff --git a/rabincarp.c b/rabincarp.c
index e69de29..a58ab15 100644
--- a/rabincarp.c
+++ b/rabincarp.c
@@ -0,0 +1,190 @@
+#include "rabincarp.h"
+
+
+int rlsi_hash_reset(rol_hash_isearch *rlh)
+{
+ rlh->pow = 1;
+ rlh->pattern_len = 0;
+ rlh->pattern_hash = 0;
+ rlh->text_hash = 0;
+ rlh->text_len = 0;
+ rlh->text = NULL;
+ rlh->pattern = NULL;
+ rlh->iter_pos = 0;
+ rlh->count = 0;
+ rlh->pre_text = NULL;
+ rlh->chunk_done = 0;
+ return 0;
+}
+
+int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m)
+{
+ rlh->b = b;
+ rlh->m = m;
+ rlsi_hash_reset(rlh);
+ return 0;
+}
+
+int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen)
+{
+ int i;
+
+ if (tlen<plen)
+ {
+ return -1;
+ }
+
+ rlh->text_len = tlen;
+ rlh->text = text;
+ rlh->pattern = pattern;
+ rlh->pattern_len = plen;
+
+ rlh->pow = 1;
+ for (i=0; i<plen-1; i++)
+ {
+ rlh->pow = (rlh->pow * rlh->b) % rlh->m;
+ }
+
+ rlh->text_hash = 0;
+ rlh->pattern_hash = 0;
+ for (i=0;i<plen;i++)
+ {
+ rlh->pattern_hash = (rlh->pattern_hash * rlh->b + pattern[i]) % rlh->m;
+ rlh->text_hash = (rlh->text_hash * rlh->b + text[i]) % rlh->m;
+ }
+
+ //printf("pow %d\n",rlh->pow);
+ //printf("pattern %d\n",rlh->pattern_hash);
+ //printf("text %d\n",rlh->text_hash);
+
+ //set buffer size for presaved text
+ rlh->pre_text = malloc(plen);//dont check err =P, mem leak here
+ rlh->count = 0;
+
+ return 0;
+}
+
+//run after prepararion step
+int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen)
+{
+ int j=0;
+
+ int found=0;
+
+ if ((rlh->text == NULL) && (rlh->pattern == NULL))
+ {
+ rlsi_hash_precalc(rlh, pattern, plen, text, tlen);
+ }
+
+ if (rlh->chunk_done == 1)
+ {
+ rlh->chunk_done = 0;
+ rlh->text_len = tlen;
+ }
+
+ //printf("while %d",rlh->text_len-rlh->pattern_len);
+ //while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len))
+ while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len))
+ {
+
+ //hashes are matching
+ if (rlh->text_hash == rlh->pattern_hash)
+ {
+ //printf("2\n");
+ //check if string match
+ for (j=0;j<rlh->pattern_len;j++)
+ {
+ //???
+ char i_c=0;
+ int k=rlh->iter_pos+j;
+ if (k<0)
+ {
+ i_c = rlh->pre_text[rlh->pattern_len+k];
+ } else
+ {
+ //i_c = rlh->text[rlh->iter_pos+j];
+ i_c = text[rlh->iter_pos+j];
+ }
+ if (i_c != rlh->pattern[j])
+ break;
+ }
+
+ if (j == rlh->pattern_len)
+ {
+ //???
+ //printf("Found pattern at %d\n",rlh->iter_pos+1);
+ //printf("Found pattern at %d\n",rlh->count);
+ found = 1;
+ }
+
+ }
+
+ //???
+ if (rlh->iter_pos < rlh->text_len - rlh->pattern_len)
+ {
+ //printf("1\n");
+ //???
+ char i_c;
+ if (rlh->iter_pos < 0)
+ {
+ i_c = rlh->pre_text[rlh->pattern_len+rlh->iter_pos];
+ } else
+ {
+ //i_c = rlh->text[rlh->iter_pos];
+ i_c = text[rlh->iter_pos];
+ }
+ //printf("CHAR: %c\n",i_c);
+ //rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + rlh->text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m;
+ rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m;
+ //printf("text %d\n",rlh->text_hash);
+ if (rlh->text_hash < 0)
+ {
+ rlh->text_hash = rlh->text_hash + rlh->m;
+ }
+ }
+
+ //iter over current buffer
+ rlh->iter_pos += 1;
+ //total data that have been iterated
+ rlh->count += 1;
+
+ if (found == 1)
+ {
+ //rlh->count -= 1;
+ return 1;
+ }
+ //iter over current buffer
+ //rlh->iter_pos += 1;
+ //total data that have been iterated
+ //rlh->count += 1;
+ //printf("rlh->iter_pos %d\n",rlh->iter_pos);
+ //printf("rlh->count %d\n",rlh->count);
+ }
+ //printf("tlen %d\n",tlen);
+ //exit from while loop because end of the buffer, iter is +1 element larger
+ //copy pattern to intermidiate buffer
+ //if ((rlh->iter_pos)==(rlh->text_len - rlh->pattern_len+1))
+ {
+ //copy last bit of buffer to further processing
+ //memcpy(rlh->pre_text, &rlh->text[rlh->iter_pos], rlh->pattern_len);
+ memcpy(rlh->pre_text, &text[rlh->iter_pos-1], rlh->pattern_len);
+ //printf("end of chunk\n");
+ //rlh->count -= 1;
+ //printf("count %d\n",rlh->count);
+
+ rlh->chunk_done = 1;
+ rlh->iter_pos = -rlh->pattern_len;
+ }
+
+//exit_loop:
+ //chunk finished search new need to be supplied pal
+ //printf("0\n");
+ rlh->count -= 1;
+ return 0;
+}
+
+int rlsi_hash_get(rol_hash_isearch *rlh)
+{
+ //return rlh->iter_pos;
+ return rlh->count;
+} \ No newline at end of file