From e7fc88b3828ca8c2c6f2e1c598a5045ceb0ff659 Mon Sep 17 00:00:00 2001 From: Franciszek Malinka Date: Thu, 25 Nov 2021 23:15:50 +0100 Subject: SO update --- semestr-5/so/lista7/so21_lista_7.pdf | Bin 0 -> 307756 bytes semestr-5/so/lista7/so21_lista_7/forksort.c | 121 ++++++++++++++++ semestr-5/so/lista7/so21_lista_7/hashdb.c | 207 ++++++++++++++++++++++++++++ 3 files changed, 328 insertions(+) create mode 100644 semestr-5/so/lista7/so21_lista_7.pdf create mode 100644 semestr-5/so/lista7/so21_lista_7/forksort.c create mode 100644 semestr-5/so/lista7/so21_lista_7/hashdb.c (limited to 'semestr-5/so/lista7') diff --git a/semestr-5/so/lista7/so21_lista_7.pdf b/semestr-5/so/lista7/so21_lista_7.pdf new file mode 100644 index 0000000..698cee0 Binary files /dev/null and b/semestr-5/so/lista7/so21_lista_7.pdf differ diff --git a/semestr-5/so/lista7/so21_lista_7/forksort.c b/semestr-5/so/lista7/so21_lista_7/forksort.c new file mode 100644 index 0000000..e93ad8c --- /dev/null +++ b/semestr-5/so/lista7/so21_lista_7/forksort.c @@ -0,0 +1,121 @@ +#include "csapp.h" + +static void InsertionSort(long table[], size_t left, size_t right) { + for (size_t pivot = left + 1; pivot <= right; pivot++) { + size_t insert = left; + + while ((insert < pivot) && (table[insert] < table[pivot])) + insert++; + + if (insert < pivot) { + size_t n = pivot - insert; + long tmp = table[pivot]; + memmove(&table[insert + 1], &table[insert], n * sizeof(long)); + table[insert] = tmp; + } + } +} + +static void SwapElem(long table[], size_t i, size_t j) { + long tmp = table[i]; + table[i] = table[j]; + table[j] = tmp; +} + +static size_t Partition(long table[], size_t begin, size_t end, long pivot) { + size_t left = begin; + size_t right = end; + + while (left < right) { + while ((left < right) && (table[left] < pivot)) + left++; + + while ((left < right) && (table[right] >= pivot)) + right--; + + if (left < right) + SwapElem(table, left, right); + } + + return left; +} + +#define INSERTSORT_MAX 32 +#define FORKSORT_MIN (1L << 18) + +static int QuickSort(long table[], size_t left, size_t right) { + pid_t pid_left = -1, pid_right = -1, pid = -1; + + /* TODO: If there is more to sort than FORKSORT_MIN start a subprocess. */ + // bool forked = false; + // if (right - left >= FORKSORT_MIN) { + // if ((pid = Fork())) { + // return pid; + // } else { + // forked = true; + // } + // } + + if (left < right) { + if (right - left <= INSERTSORT_MAX) { + InsertionSort(table, left, right); + } else { + size_t pivot = left + random() % (right - left + 1); + size_t split = Partition(table, left, right, table[pivot]); + + if (left == split) { + SwapElem(table, left, pivot); + split++; + } else { + pid_left = QuickSort(table, left, split - 1); + } + + pid_right = QuickSort(table, split, right); + + /* TODO: Wait for possible children and exit if created a subprocess. */ + // if (pid_left != -1) { + // waitpid(pid_left, NULL, 0); + // } + // if (pid_right != -1) { + // waitpid(pid_right, NULL, 0); + // } + // if (forked) exit(EXIT_SUCCESS); + } + } + + return pid; +} + +#define NELEMS (1L << 26) + +int main(void) { + /* Table size in bytes must be divisible by page size. */ + size_t size = NELEMS * sizeof(long); + assert((size & (getpagesize() - 1)) == 0); + + /* Initialize PRNG seed. */ + struct timeval tv; + gettimeofday(&tv, NULL); + srandom(tv.tv_usec); + + /* TODO: Allocate table... */ + + long *table = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + /* ... and fill it in with random elements. */ + for (size_t i = 0; i < NELEMS; i++) + table[i] = random(); + + /* Sort it using hybrid algorithm... */ + if (QuickSort(table, 0, NELEMS - 1) > 0) + Wait(NULL); + + /* ... and verify if the table is actually sorted. */ + long prev = LONG_MIN; + for (size_t i = 0; i < NELEMS; i++) { + assert(prev <= table[i]); + prev = table[i]; + } + + return 0; +} diff --git a/semestr-5/so/lista7/so21_lista_7/hashdb.c b/semestr-5/so/lista7/so21_lista_7/hashdb.c new file mode 100644 index 0000000..d63c32a --- /dev/null +++ b/semestr-5/so/lista7/so21_lista_7/hashdb.c @@ -0,0 +1,207 @@ +#include "csapp.h" + +#define ENT_LENGTH 48 + +#define DB_MODE (S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH) +#define DB_PROT (PROT_READ | PROT_WRITE) + +typedef enum { DB_QUERY, DB_INSERT } op_t; +typedef char entry_t[ENT_LENGTH]; + +typedef struct db { + entry_t *entry; + ssize_t size; + char *name; +} db_t; + +static bool db_action(db_t *db, const char *str, op_t op) { + /* Do not insert empty strings. Database doesn't store them. */ + if (*str == '\0') + return op == DB_INSERT; + size_t len = strnlen(str, ENT_LENGTH); + int chain_len = max((db->size >> 14) - 1, 3); + uint32_t chain = jenkins_hash(str, len, HASHINIT); + for (int c = 0; c < chain_len; c++) { + size_t idx = (chain + c) % db->size; + if (!db->entry[idx][0] && op == DB_INSERT) { + strncpy(db->entry[idx], str, ENT_LENGTH); + return true; + } + if (strncmp(str, db->entry[idx], ENT_LENGTH) == 0) + return true; + } + return false; +} + +#define db_query(db, key) db_action(db, key, DB_QUERY) +#define db_maybe_insert(db, key) db_action(db, key, DB_INSERT) + +/* Open (`size` = 0) or create (`size` > 0) database from `name` file. */ +static void db_open(db_t *db, const char *name, size_t size) { + assert(powerof2(size)); + + int fd = Open(name, O_RDWR | O_CREAT | (size ? O_EXCL : 0), DB_MODE); + + if (size == 0) { + struct stat sb; + Fstat(fd, &sb); + size = sb.st_size / sizeof(entry_t); + if (size == 0) + size = 1; + } + + /* TODO: Setup DB structure, set file size and map the file into memory. + Inform OS that we're going to read DB in random order. */ + db->size = size; + printf("Size: %ld\n", db->size * sizeof(entry_t)); + Ftruncate(fd, db->size * sizeof(entry_t)); + db->name = name; + db->entry = Mmap(NULL, + db->size * sizeof(entry_t), + DB_PROT, + MAP_SHARED, + fd, + 0); + Madvise(db->entry, db->size * sizeof(entry_t), MADV_RANDOM); + Close(fd); +} + +/* Remove DB memory mapping and release associated memory. */ +static void db_close(db_t *db) { + Munmap(db->entry, db->size * sizeof(entry_t)); + free(db->name); +} + +/* Attempt to increase size of database. */ +static bool db_rehash(db_t *db, size_t new_size) { + assert(powerof2(new_size)); + + /* Create new database. */ + db_t new[1]; + + char *name = alloca(strlen(db->name) + sizeof(".reshash") + 1); + strcpy(name, db->name); + strcat(name, ".rehash"); + db_open(new, name, new_size); + + /* Copy everything from old database to new database. */ + /* TODO: Inform OS that we're going to read DB sequentially. */ + Madvise(db->entry, db->size * sizeof(entry_t), MADV_SEQUENTIAL); + for (size_t i = 0; i < db->size; i++) { + if (!db_maybe_insert(new, db->entry[i])) { + /* Oops... rehashing failed. Need to increase db size and try again. */ + /* TODO: Remove new database, since rehashing failed. */ + Unlink(name); + Munmap(new->entry, new->size * sizeof(entry_t)); + return false; + } + } + + /* TODO Replace old database with new one, remove old database. */ + // truncate(db->name, new->size * sizeof(entry_t)); + // Munmap(db->entry, db->size * sizeof(entry_t)); + // printf("Going to sleep for a minute\n"); + // sleep(100); + + /* Why this doesn't work?????? */ + // Munmap(db->entry, db->size * sizeof(entry_t)); + // Unlink(db->name); + // db_open(db, db->name, new->size); + // Rename(new->name, db->name); + // Munmap(new->entry, new->size * sizeof(entry_t)); + + Unlink(db->name); + Rename(new->name, db->name); + Munmap(db->entry, db->size * sizeof(entry_t)); + + db->entry = new->entry; + db->size = new->size; + return true; + + return true; +} + +/* Insert key into database and possibly increase DB size. */ +static void db_insert(db_t *db, const char *str) { + while (!db_maybe_insert(db, str)) { + size_t size = db->size; + do { + size *= 2; + fprintf(stderr, "db_rehash: %ld -> %ld\n", db->size, size); + } while (!db_rehash(db, size)); + } +} + +/* Read a key from buffer and perform `mode` operation of the database. */ +static char *consume_line(char *buf, db_t *db, op_t mode) { + /* Terminate string at new line character. */ + char *end = strchr(buf, '\n'); + if (end) { + if (buf < end && end[-1] == '\r') + end[-1] = '\0'; + *end++ = '\0'; + } + + /* Skip if line is empty. */ + if (*buf) { + if (mode == DB_INSERT) + db_insert(db, buf); + else if (mode == DB_QUERY) + puts(db_query(db, buf) ? "YES" : "NO"); + } + + /* Return pointer to next line or NULL. */ + if (end == NULL) { + exit(EXIT_FAILURE); + } + return end; +} + +static void doit(const char *path, op_t mode) { + db_t db; + db_open(&db, path, 0); + + /* If input file is a terminal device then use standard reading technique. */ + /* TODO: Use fstat instead to handle pipes correctly. */ + struct stat sb; + Fstat(STDIN_FILENO, &sb); + + if (S_ISCHR(sb.st_mode) || S_ISFIFO(sb.st_mode)) { + char buf[ENT_LENGTH + 1]; + while (fgets(buf, ENT_LENGTH + 1, stdin)) + consume_line(buf, &db, mode); + } else { + /* TODO: Map stdin into memory, and read it line by line. */ + char *input = Mmap(NULL, sb.st_size + 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE, STDIN_FILENO, 0); + printf("strlen: %ld\n", strlen(input)); + while ((input = consume_line(input, &db, mode))); + } + + db_close(&db); +} + +static noreturn void usage(int argc, char **argv) { + app_error("Usage: %s [-i|-q] [FILE]", argv[0]); +} + +int main(int argc, char **argv) { + op_t mode = -1; + int opt; + + while ((opt = getopt(argc, argv, "iq")) != -1) { + if (opt == 'i') + mode = DB_INSERT; + else if (opt == 'q') + mode = DB_QUERY; + else + usage(argc, argv); + } + + if (optind >= argc || mode == -1) + usage(argc, argv); + + doit(argv[optind], mode); + + return 0; +} -- cgit v1.2.3