aboutsummaryrefslogtreecommitdiff
path: root/semestr-5/so/lista7/so21_lista_7/hashdb.c
blob: d63c32ad888495174d1a8c8e4b99549122be634c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include "csapp.h"

#define ENT_LENGTH 48

#define DB_MODE (S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH)
#define DB_PROT (PROT_READ | PROT_WRITE)

typedef enum { DB_QUERY, DB_INSERT } op_t;
typedef char entry_t[ENT_LENGTH];

typedef struct db {
  entry_t *entry;
  ssize_t size;
  char *name;
} db_t;

static bool db_action(db_t *db, const char *str, op_t op) {
  /* Do not insert empty strings. Database doesn't store them. */
  if (*str == '\0')
    return op == DB_INSERT;
  size_t len = strnlen(str, ENT_LENGTH);
  int chain_len = max((db->size >> 14) - 1, 3);
  uint32_t chain = jenkins_hash(str, len, HASHINIT);
  for (int c = 0; c < chain_len; c++) {
    size_t idx = (chain + c) % db->size;
    if (!db->entry[idx][0] && op == DB_INSERT) {
      strncpy(db->entry[idx], str, ENT_LENGTH);
      return true;
    }
    if (strncmp(str, db->entry[idx], ENT_LENGTH) == 0)
      return true;
  }
  return false;
}

#define db_query(db, key) db_action(db, key, DB_QUERY)
#define db_maybe_insert(db, key) db_action(db, key, DB_INSERT)

/* Open (`size` = 0) or create (`size` > 0) database from `name` file. */
static void db_open(db_t *db, const char *name, size_t size) {
  assert(powerof2(size));

  int fd = Open(name, O_RDWR | O_CREAT | (size ? O_EXCL : 0), DB_MODE);

  if (size == 0) {
    struct stat sb;
    Fstat(fd, &sb);
    size = sb.st_size / sizeof(entry_t);
    if (size == 0)
      size = 1;
  }

  /* TODO: Setup DB structure, set file size and map the file into memory.
           Inform OS that we're going to read DB in random order. */
  db->size = size;
  printf("Size: %ld\n", db->size * sizeof(entry_t));
  Ftruncate(fd, db->size * sizeof(entry_t));
  db->name = name;
  db->entry = Mmap(NULL, 
                   db->size * sizeof(entry_t), 
                   DB_PROT, 
                   MAP_SHARED, 
                   fd, 
                   0);
  Madvise(db->entry, db->size * sizeof(entry_t), MADV_RANDOM);
  Close(fd);
}

/* Remove DB memory mapping and release associated memory. */
static void db_close(db_t *db) {
  Munmap(db->entry, db->size * sizeof(entry_t));
  free(db->name);
}

/* Attempt to increase size of database. */
static bool db_rehash(db_t *db, size_t new_size) {
  assert(powerof2(new_size));

  /* Create new database. */
  db_t new[1];

  char *name = alloca(strlen(db->name) + sizeof(".reshash") + 1);
  strcpy(name, db->name);
  strcat(name, ".rehash");
  db_open(new, name, new_size);

  /* Copy everything from old database to new database. */
  /* TODO: Inform OS that we're going to read DB sequentially. */
  Madvise(db->entry, db->size * sizeof(entry_t), MADV_SEQUENTIAL);
  for (size_t i = 0; i < db->size; i++) {
    if (!db_maybe_insert(new, db->entry[i])) {
      /* Oops... rehashing failed. Need to increase db size and try again. */
      /* TODO: Remove new database, since rehashing failed. */
      Unlink(name);
      Munmap(new->entry, new->size * sizeof(entry_t));
      return false;
    }
  }

  /* TODO Replace old database with new one, remove old database. */
  // truncate(db->name, new->size * sizeof(entry_t));
  // Munmap(db->entry, db->size * sizeof(entry_t));
  // printf("Going to sleep for a minute\n");
  // sleep(100);
  
  /* Why this doesn't work?????? */
  // Munmap(db->entry, db->size * sizeof(entry_t));
  // Unlink(db->name);
  // db_open(db, db->name, new->size);
  // Rename(new->name, db->name);
  // Munmap(new->entry, new->size * sizeof(entry_t));
  
  Unlink(db->name);
  Rename(new->name, db->name);
  Munmap(db->entry, db->size * sizeof(entry_t));
  
  db->entry = new->entry;
  db->size = new->size;  
  return true;
  
  return true;
}

/* Insert key into database and possibly increase DB size. */
static void db_insert(db_t *db, const char *str) {
  while (!db_maybe_insert(db, str)) {
    size_t size = db->size;
    do {
      size *= 2;
      fprintf(stderr, "db_rehash: %ld -> %ld\n", db->size, size);
    } while (!db_rehash(db, size));
  }
}

/* Read a key from buffer and perform `mode` operation of the database. */
static char *consume_line(char *buf, db_t *db, op_t mode) {
  /* Terminate string at new line character. */
  char *end = strchr(buf, '\n');
  if (end) {
    if (buf < end && end[-1] == '\r')
      end[-1] = '\0';
    *end++ = '\0';
  }

  /* Skip if line is empty. */
  if (*buf) {
    if (mode == DB_INSERT)
      db_insert(db, buf);
    else if (mode == DB_QUERY)
      puts(db_query(db, buf) ? "YES" : "NO");
  }

  /* Return pointer to next line or NULL. */
  if (end == NULL) {
    exit(EXIT_FAILURE);
  }
  return end;
}

static void doit(const char *path, op_t mode) {
  db_t db;
  db_open(&db, path, 0);

  /* If input file is a terminal device then use standard reading technique. */
  /* TODO: Use fstat instead to handle pipes correctly. */
  struct stat sb;
  Fstat(STDIN_FILENO, &sb);

  if (S_ISCHR(sb.st_mode) || S_ISFIFO(sb.st_mode)) {
    char buf[ENT_LENGTH + 1];
    while (fgets(buf, ENT_LENGTH + 1, stdin))
      consume_line(buf, &db, mode);
  } else {
    /* TODO: Map stdin into memory, and read it line by line. */
    char *input = Mmap(NULL, sb.st_size + 1, PROT_READ | PROT_WRITE, 
                       MAP_PRIVATE, STDIN_FILENO, 0);
    printf("strlen: %ld\n", strlen(input));
    while ((input = consume_line(input, &db, mode)));
  }

  db_close(&db);
}

static noreturn void usage(int argc, char **argv) {
  app_error("Usage: %s [-i|-q] [FILE]", argv[0]);
}

int main(int argc, char **argv) {
  op_t mode = -1;
  int opt;

  while ((opt = getopt(argc, argv, "iq")) != -1) {
    if (opt == 'i')
      mode = DB_INSERT;
    else if (opt == 'q')
      mode = DB_QUERY;
    else
      usage(argc, argv);
  }

  if (optind >= argc || mode == -1)
    usage(argc, argv);

  doit(argv[optind], mode);

  return 0;
}