aboutsummaryrefslogtreecommitdiff
path: root/semestr-5/so/lista7/so21_lista_7/hashdb.c
diff options
context:
space:
mode:
Diffstat (limited to 'semestr-5/so/lista7/so21_lista_7/hashdb.c')
-rw-r--r--semestr-5/so/lista7/so21_lista_7/hashdb.c207
1 files changed, 207 insertions, 0 deletions
diff --git a/semestr-5/so/lista7/so21_lista_7/hashdb.c b/semestr-5/so/lista7/so21_lista_7/hashdb.c
new file mode 100644
index 0000000..d63c32a
--- /dev/null
+++ b/semestr-5/so/lista7/so21_lista_7/hashdb.c
@@ -0,0 +1,207 @@
+#include "csapp.h"
+
+#define ENT_LENGTH 48
+
+#define DB_MODE (S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH)
+#define DB_PROT (PROT_READ | PROT_WRITE)
+
+typedef enum { DB_QUERY, DB_INSERT } op_t;
+typedef char entry_t[ENT_LENGTH];
+
+typedef struct db {
+ entry_t *entry;
+ ssize_t size;
+ char *name;
+} db_t;
+
+static bool db_action(db_t *db, const char *str, op_t op) {
+ /* Do not insert empty strings. Database doesn't store them. */
+ if (*str == '\0')
+ return op == DB_INSERT;
+ size_t len = strnlen(str, ENT_LENGTH);
+ int chain_len = max((db->size >> 14) - 1, 3);
+ uint32_t chain = jenkins_hash(str, len, HASHINIT);
+ for (int c = 0; c < chain_len; c++) {
+ size_t idx = (chain + c) % db->size;
+ if (!db->entry[idx][0] && op == DB_INSERT) {
+ strncpy(db->entry[idx], str, ENT_LENGTH);
+ return true;
+ }
+ if (strncmp(str, db->entry[idx], ENT_LENGTH) == 0)
+ return true;
+ }
+ return false;
+}
+
+#define db_query(db, key) db_action(db, key, DB_QUERY)
+#define db_maybe_insert(db, key) db_action(db, key, DB_INSERT)
+
+/* Open (`size` = 0) or create (`size` > 0) database from `name` file. */
+static void db_open(db_t *db, const char *name, size_t size) {
+ assert(powerof2(size));
+
+ int fd = Open(name, O_RDWR | O_CREAT | (size ? O_EXCL : 0), DB_MODE);
+
+ if (size == 0) {
+ struct stat sb;
+ Fstat(fd, &sb);
+ size = sb.st_size / sizeof(entry_t);
+ if (size == 0)
+ size = 1;
+ }
+
+ /* TODO: Setup DB structure, set file size and map the file into memory.
+ Inform OS that we're going to read DB in random order. */
+ db->size = size;
+ printf("Size: %ld\n", db->size * sizeof(entry_t));
+ Ftruncate(fd, db->size * sizeof(entry_t));
+ db->name = name;
+ db->entry = Mmap(NULL,
+ db->size * sizeof(entry_t),
+ DB_PROT,
+ MAP_SHARED,
+ fd,
+ 0);
+ Madvise(db->entry, db->size * sizeof(entry_t), MADV_RANDOM);
+ Close(fd);
+}
+
+/* Remove DB memory mapping and release associated memory. */
+static void db_close(db_t *db) {
+ Munmap(db->entry, db->size * sizeof(entry_t));
+ free(db->name);
+}
+
+/* Attempt to increase size of database. */
+static bool db_rehash(db_t *db, size_t new_size) {
+ assert(powerof2(new_size));
+
+ /* Create new database. */
+ db_t new[1];
+
+ char *name = alloca(strlen(db->name) + sizeof(".reshash") + 1);
+ strcpy(name, db->name);
+ strcat(name, ".rehash");
+ db_open(new, name, new_size);
+
+ /* Copy everything from old database to new database. */
+ /* TODO: Inform OS that we're going to read DB sequentially. */
+ Madvise(db->entry, db->size * sizeof(entry_t), MADV_SEQUENTIAL);
+ for (size_t i = 0; i < db->size; i++) {
+ if (!db_maybe_insert(new, db->entry[i])) {
+ /* Oops... rehashing failed. Need to increase db size and try again. */
+ /* TODO: Remove new database, since rehashing failed. */
+ Unlink(name);
+ Munmap(new->entry, new->size * sizeof(entry_t));
+ return false;
+ }
+ }
+
+ /* TODO Replace old database with new one, remove old database. */
+ // truncate(db->name, new->size * sizeof(entry_t));
+ // Munmap(db->entry, db->size * sizeof(entry_t));
+ // printf("Going to sleep for a minute\n");
+ // sleep(100);
+
+ /* Why this doesn't work?????? */
+ // Munmap(db->entry, db->size * sizeof(entry_t));
+ // Unlink(db->name);
+ // db_open(db, db->name, new->size);
+ // Rename(new->name, db->name);
+ // Munmap(new->entry, new->size * sizeof(entry_t));
+
+ Unlink(db->name);
+ Rename(new->name, db->name);
+ Munmap(db->entry, db->size * sizeof(entry_t));
+
+ db->entry = new->entry;
+ db->size = new->size;
+ return true;
+
+ return true;
+}
+
+/* Insert key into database and possibly increase DB size. */
+static void db_insert(db_t *db, const char *str) {
+ while (!db_maybe_insert(db, str)) {
+ size_t size = db->size;
+ do {
+ size *= 2;
+ fprintf(stderr, "db_rehash: %ld -> %ld\n", db->size, size);
+ } while (!db_rehash(db, size));
+ }
+}
+
+/* Read a key from buffer and perform `mode` operation of the database. */
+static char *consume_line(char *buf, db_t *db, op_t mode) {
+ /* Terminate string at new line character. */
+ char *end = strchr(buf, '\n');
+ if (end) {
+ if (buf < end && end[-1] == '\r')
+ end[-1] = '\0';
+ *end++ = '\0';
+ }
+
+ /* Skip if line is empty. */
+ if (*buf) {
+ if (mode == DB_INSERT)
+ db_insert(db, buf);
+ else if (mode == DB_QUERY)
+ puts(db_query(db, buf) ? "YES" : "NO");
+ }
+
+ /* Return pointer to next line or NULL. */
+ if (end == NULL) {
+ exit(EXIT_FAILURE);
+ }
+ return end;
+}
+
+static void doit(const char *path, op_t mode) {
+ db_t db;
+ db_open(&db, path, 0);
+
+ /* If input file is a terminal device then use standard reading technique. */
+ /* TODO: Use fstat instead to handle pipes correctly. */
+ struct stat sb;
+ Fstat(STDIN_FILENO, &sb);
+
+ if (S_ISCHR(sb.st_mode) || S_ISFIFO(sb.st_mode)) {
+ char buf[ENT_LENGTH + 1];
+ while (fgets(buf, ENT_LENGTH + 1, stdin))
+ consume_line(buf, &db, mode);
+ } else {
+ /* TODO: Map stdin into memory, and read it line by line. */
+ char *input = Mmap(NULL, sb.st_size + 1, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, STDIN_FILENO, 0);
+ printf("strlen: %ld\n", strlen(input));
+ while ((input = consume_line(input, &db, mode)));
+ }
+
+ db_close(&db);
+}
+
+static noreturn void usage(int argc, char **argv) {
+ app_error("Usage: %s [-i|-q] [FILE]", argv[0]);
+}
+
+int main(int argc, char **argv) {
+ op_t mode = -1;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "iq")) != -1) {
+ if (opt == 'i')
+ mode = DB_INSERT;
+ else if (opt == 'q')
+ mode = DB_QUERY;
+ else
+ usage(argc, argv);
+ }
+
+ if (optind >= argc || mode == -1)
+ usage(argc, argv);
+
+ doit(argv[optind], mode);
+
+ return 0;
+}