summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.c93
-rw-r--r--rabincarp.c190
-rw-r--r--rabincarp.h36
3 files changed, 316 insertions, 3 deletions
diff --git a/main.c b/main.c
index dc57337..9ab58fe 100644
--- a/main.c
+++ b/main.c
@@ -64,6 +64,8 @@ typedef struct match_params
{
char *fname;
char *match_str;
+ int match_len;
+ int buf_size;
int verbose;
} match_params;
@@ -73,17 +75,102 @@ void helper( char *progname )
printf("Usage: %s [OPTS]\n\n"
" -f - input file\n"
" -m - match string\n"
+ //" -b - set buffer size to read\n"
" -v - more extra info in output\n"
"Version: 0.0.1 \n"
"\n"
, progname);
}
-int main(int argc, const char *argv[])
+int main(int argc, char **argv)
{
-
+ int c;
+ int i;
+ int fd;
+ int text_len;
+ int ret;
+
+ char *text = NULL;
+
+ //search
+ rol_hash_isearch search;
+
+ //parametrs
+ match_params mp;
+
+ memset(&mp,0,sizeof(match_params));
+
+ while ( (c = getopt(argc, argv, "f:m:b:v")) != -1 )
+ {
+ switch(c)
+ {
+ case 'f':
+ mp.fname = optarg;
+ break;
+ case 'm':
+ mp.match_str = optarg;
+ mp.match_len = strlen(optarg);
+ break;
+ case 'b':
+ mp.buf_size = atoi(optarg);
+ break;
+ case 'v':
+ mp.verbose = 1;
+ break;
+ default:
+ helper( argv[0] );
+ exit(1);
+ }
+ }
+
+ if (argc<2)
+ {
+ helper(argv[0]);
+ return -1;
+ }
+
+
+ if (mp.fname == NULL)
+ {
+ printf("Set filename\n");
+ return -1;
+ }
+
+ if (mp.match_str == NULL)
+ {
+ printf("Set search string\n");
+ return -1;
+ }
+
+ fd = file_open( mp.fname );
+ if (fd<0)
+ {
+ printf("Couldnt open file %s\n", mp.fname);
+
+ return -1;
+ }
+
+ text_len = lseek(fd,0,SEEK_END);
+ lseek(fd, 0,SEEK_SET);
+ text = malloc(text_len);
+ //printf("Read %d bytes\n",read(fd,text,text_len));
+
+ ret = read(fd, text, text_len);
+ if (mp.verbose)
+ {
+ printf("Read %d bytes\n",ret);
+ }
+
+
+ rlsi_hash_init( &search, 10, 13);
+ while ((i = rlsi_hash_search(&search, mp.match_str, mp.match_len, text, text_len)) == 1)
+ {
+ //printf("Found smth %d\n",rlsi_hash_get(&search));
+ printf("%d\n",rlsi_hash_get(&search));
+ }
+
+ file_close( fd );
-
return 0;
} \ No newline at end of file
diff --git a/rabincarp.c b/rabincarp.c
index e69de29..a58ab15 100644
--- a/rabincarp.c
+++ b/rabincarp.c
@@ -0,0 +1,190 @@
+#include "rabincarp.h"
+
+
+int rlsi_hash_reset(rol_hash_isearch *rlh)
+{
+ rlh->pow = 1;
+ rlh->pattern_len = 0;
+ rlh->pattern_hash = 0;
+ rlh->text_hash = 0;
+ rlh->text_len = 0;
+ rlh->text = NULL;
+ rlh->pattern = NULL;
+ rlh->iter_pos = 0;
+ rlh->count = 0;
+ rlh->pre_text = NULL;
+ rlh->chunk_done = 0;
+ return 0;
+}
+
+int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m)
+{
+ rlh->b = b;
+ rlh->m = m;
+ rlsi_hash_reset(rlh);
+ return 0;
+}
+
+int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen)
+{
+ int i;
+
+ if (tlen<plen)
+ {
+ return -1;
+ }
+
+ rlh->text_len = tlen;
+ rlh->text = text;
+ rlh->pattern = pattern;
+ rlh->pattern_len = plen;
+
+ rlh->pow = 1;
+ for (i=0; i<plen-1; i++)
+ {
+ rlh->pow = (rlh->pow * rlh->b) % rlh->m;
+ }
+
+ rlh->text_hash = 0;
+ rlh->pattern_hash = 0;
+ for (i=0;i<plen;i++)
+ {
+ rlh->pattern_hash = (rlh->pattern_hash * rlh->b + pattern[i]) % rlh->m;
+ rlh->text_hash = (rlh->text_hash * rlh->b + text[i]) % rlh->m;
+ }
+
+ //printf("pow %d\n",rlh->pow);
+ //printf("pattern %d\n",rlh->pattern_hash);
+ //printf("text %d\n",rlh->text_hash);
+
+ //set buffer size for presaved text
+ rlh->pre_text = malloc(plen);//dont check err =P, mem leak here
+ rlh->count = 0;
+
+ return 0;
+}
+
+//run after prepararion step
+int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen)
+{
+ int j=0;
+
+ int found=0;
+
+ if ((rlh->text == NULL) && (rlh->pattern == NULL))
+ {
+ rlsi_hash_precalc(rlh, pattern, plen, text, tlen);
+ }
+
+ if (rlh->chunk_done == 1)
+ {
+ rlh->chunk_done = 0;
+ rlh->text_len = tlen;
+ }
+
+ //printf("while %d",rlh->text_len-rlh->pattern_len);
+ //while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len))
+ while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len))
+ {
+
+ //hashes are matching
+ if (rlh->text_hash == rlh->pattern_hash)
+ {
+ //printf("2\n");
+ //check if string match
+ for (j=0;j<rlh->pattern_len;j++)
+ {
+ //???
+ char i_c=0;
+ int k=rlh->iter_pos+j;
+ if (k<0)
+ {
+ i_c = rlh->pre_text[rlh->pattern_len+k];
+ } else
+ {
+ //i_c = rlh->text[rlh->iter_pos+j];
+ i_c = text[rlh->iter_pos+j];
+ }
+ if (i_c != rlh->pattern[j])
+ break;
+ }
+
+ if (j == rlh->pattern_len)
+ {
+ //???
+ //printf("Found pattern at %d\n",rlh->iter_pos+1);
+ //printf("Found pattern at %d\n",rlh->count);
+ found = 1;
+ }
+
+ }
+
+ //???
+ if (rlh->iter_pos < rlh->text_len - rlh->pattern_len)
+ {
+ //printf("1\n");
+ //???
+ char i_c;
+ if (rlh->iter_pos < 0)
+ {
+ i_c = rlh->pre_text[rlh->pattern_len+rlh->iter_pos];
+ } else
+ {
+ //i_c = rlh->text[rlh->iter_pos];
+ i_c = text[rlh->iter_pos];
+ }
+ //printf("CHAR: %c\n",i_c);
+ //rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + rlh->text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m;
+ rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m;
+ //printf("text %d\n",rlh->text_hash);
+ if (rlh->text_hash < 0)
+ {
+ rlh->text_hash = rlh->text_hash + rlh->m;
+ }
+ }
+
+ //iter over current buffer
+ rlh->iter_pos += 1;
+ //total data that have been iterated
+ rlh->count += 1;
+
+ if (found == 1)
+ {
+ //rlh->count -= 1;
+ return 1;
+ }
+ //iter over current buffer
+ //rlh->iter_pos += 1;
+ //total data that have been iterated
+ //rlh->count += 1;
+ //printf("rlh->iter_pos %d\n",rlh->iter_pos);
+ //printf("rlh->count %d\n",rlh->count);
+ }
+ //printf("tlen %d\n",tlen);
+ //exit from while loop because end of the buffer, iter is +1 element larger
+ //copy pattern to intermidiate buffer
+ //if ((rlh->iter_pos)==(rlh->text_len - rlh->pattern_len+1))
+ {
+ //copy last bit of buffer to further processing
+ //memcpy(rlh->pre_text, &rlh->text[rlh->iter_pos], rlh->pattern_len);
+ memcpy(rlh->pre_text, &text[rlh->iter_pos-1], rlh->pattern_len);
+ //printf("end of chunk\n");
+ //rlh->count -= 1;
+ //printf("count %d\n",rlh->count);
+
+ rlh->chunk_done = 1;
+ rlh->iter_pos = -rlh->pattern_len;
+ }
+
+//exit_loop:
+ //chunk finished search new need to be supplied pal
+ //printf("0\n");
+ rlh->count -= 1;
+ return 0;
+}
+
+int rlsi_hash_get(rol_hash_isearch *rlh)
+{
+ //return rlh->iter_pos;
+ return rlh->count;
+} \ No newline at end of file
diff --git a/rabincarp.h b/rabincarp.h
index e69de29..753ff66 100644
--- a/rabincarp.h
+++ b/rabincarp.h
@@ -0,0 +1,36 @@
+#ifndef __RABINCARP_H
+#define __RABINCARP_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+typedef struct rol_hash_isearch
+{
+ int b;
+ int m;
+ int pow;
+
+ int pattern_len;
+ int text_len;
+ int pattern_hash;
+ int text_hash;
+ char *text;
+ char *pattern;
+ int iter_pos;
+
+ char *pre_text;
+ int count;
+ int chunk_done;
+} rol_hash_isearch;
+
+int rlsi_hash_reset(rol_hash_isearch *rlh);
+int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m);
+int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen);
+int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen);
+int rlsi_hash_get(rol_hash_isearch *rlh);
+
+#endif \ No newline at end of file