Jump to content

Hash Table in C.

Gat Pelsinger

Working on a hash table in C.  Right now it doesn't work. It prints 25 and then segfaults. Probably the linked list is not being linked correctly. I hope somebody takes a look.

 

HashTable.h - 

 

#ifndef HashTable_H
#define HashTable_H

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>

#define TABLE_SIZE 99
struct KeyValue {
    char* key;
    struct ValueList* value;
};

struct ValueList {
    char* value;
    struct ValueList* next;
};

struct HashTable {
    struct KeyValue* table[TABLE_SIZE];
};

size_t hash(const char* key);
struct HashTable* createHashTable();
void insert(struct HashTable* table, const char* key, size_t valueNum, char* value, ...);
struct ValueList* search(struct HashTable* table, const char* key);
void freeTable(struct HashTable* table);

#endif

 

HashTable.c - 

 

#include "HashTable.h"

size_t hash(const char* key){
    size_t hash = 0;
    for (size_t i = 0; i < TABLE_SIZE; i++) {
        hash = 69 * hash + key[i];
    }
    return hash % TABLE_SIZE;
}

struct HashTable* createHashTable(){
    struct HashTable* table = (struct HashTable*)malloc(sizeof(struct HashTable));
    for(size_t i = 0; i < TABLE_SIZE; i++){
        table->table[i] = NULL;
    }
    return table;
}

void insert(struct HashTable* table, const char* key, size_t valueNum, char* value, ...){
    size_t index = hash(key); //use the hash as index.
    //setting up pair and vlist to allocate the keys and values.
    //Key is only 1 string, but values are a linked list.
    struct KeyValue* pair = (struct KeyValue*)malloc(sizeof(struct KeyValue));
    if (pair == NULL){
        fprintf(stderr, "HeapAllocationFailedException");
        exit(1);
    }
    pair->key = strdup(key); //store the key
    struct ValueList* vlist = (struct ValueList*)malloc(sizeof(struct ValueList));
    if (vlist == NULL){
        fprintf(stderr, "HeapAllocationFailedException");
        exit(1);
    }
    vlist->value = strdup(value); //store the value in top-most node
    vlist->next = NULL;
    if (valueNum < 1)
        valueNum = 1; //for safety
    if (valueNum > 1) {
        va_list args;
        va_start(args, value);
        struct ValueList* current = vlist->next; //a new pointer for pointer arithmetic
        for (size_t i = 0; i < valueNum - 1; i++) {
            current = (struct ValueList*)malloc(sizeof(struct ValueList)); //allocation for every new value node
            if (current == NULL){
                fprintf(stderr, "HeapAllocationFailedException");
                exit(1);
            }
            current->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
            current->next = NULL;
            current = current->next;
        }
    }
    pair->value = vlist; //actual linking of the linked list to the top-most node with the key
    if (table->table[index] == NULL) {
        table->table[index] = pair; //allocate the key and the value linked list
        return;
    }
    else {
        //if key is same, then we want to attach the given values in args to the end of the already allocated linked list in table.
        if (strcmp(table->table[index]->key, key) == 0) {
            vlist = table->table[index]->value; //reusing vlist as current
            while (vlist->next != NULL) {
                vlist = vlist->next;
            }
            vlist->next = pair->value;
            return;
        }
        else {
            size_t i = 0;
            while (table->table[i] != NULL && i < TABLE_SIZE) { //linear probing, in case hash function generates same hash on 2 different keys.
                i++;
            }
            if (table->table[i] != NULL) {
                fprintf(stderr, "HashTableOutofMemoryException");
                exit(1);
            }
            else 
                table->table[i] = pair;
            return;
        }
    }
}

struct ValueList* search(struct HashTable* table, const char* key){
    //TODO: This function returns the linked list of values, but I am going to make it return a string array.
    size_t index = hash(key);
    if (table->table[index] == NULL)
        return NULL;
    if (strcmp(table->table[index]->key, key) == 0)
        return table->table[index]->value;
    else {
        size_t i = 0;
        while (strcmp(table->table[i]->key, key) != 0 && i < TABLE_SIZE) { //compensating for linear probing.
            i++;
        }
        if (strcmp(table->table[i]->key, key) == 0) {
            return table->table[i]->value;
        }
        else 
            return NULL;
    }
}

void freeTable(struct HashTable* table){
    size_t i = 0;
    while (i < TABLE_SIZE){
        if (table->table[i] == NULL) {
            i++;
            continue;
        }
        free(table->table[i]->key);
        struct ValueList* current = table->table[i]->value;
        struct ValueList* temp = current;
        while (current->next != NULL) {
            current = current->next;
            free(temp);
        }
        free(current);
        i++;
    }
    free(table);
}

 

main.c - 

 

#include "HashTable.h"
#include <stdio.h>

int main(int argc, char** argv) {
    struct HashTable* table = createHashTable();
    insert(table, "John", 3, "25", "6", "312");
    insert(table, "Joseph", 3, "23", "5.4", "214");
    insert(table, "Kyle", 3, "28", "5.8", "219");
    printf("%s", (search(table, "John")->value));
    printf("%s", (search(table, "John")->next->value));
    printf("%s", (search(table, "John")->next->next->value));
    return 0;
}

 

It prints 25 and then a segfault occurs.

 

Edit - Also, should I really use strcpy instead of strdup? strdup passes a pointer to the string but with strcpy I directly get the string.

 

Microsoft owns my soul.

 

Also, Dell is evil, but HP kinda nice.

Link to comment
Share on other sites

Link to post
Share on other sites

I haven't messed with C++ in many years, but I think I see a couple issues in your insert() function and how you're handling your va_args.

 

First, the method signature looks incorrect to me

// void insert(struct HashTable* table, const char* key, size_t valueNum, char* value, ...);
void insert(struct HashTable* table, const char* key, size_t valueNum, char* values...); // va_args will be a list of char*

 

Then, in your loop, you're not accessing va_arg correctly

// current->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
current->value = strdup(va_arg(args, i+1)); // get the (i+1)th value from the args list (you've read in the 0th value already before the loop

 

Link to comment
Share on other sites

Link to post
Share on other sites

@QuantumRand

 

1) The method signature is actually correct, you have copied it wrong.

 

2) I cannot do i + 1. That parameter requires a type as a parameter, which will be the type of the next argument of the function. I was told that whenever I call va_arg, it jumps to the next argument.

 

EDIT - @QuantumRand One problem I already noticed is that in my hash function, by i would iterate till it reaches Table_size, which is totally false, it should iterate till it reaches the '/0' meaning at the end of the string key. But even after this change there is no change in output. It still successfully allocates and retrieves the first value, 25, and if my has function wouldn't work, nothing would work, so, how was it working before the fix? shouldn't key (out of bounds) give me segfault?

Microsoft owns my soul.

 

Also, Dell is evil, but HP kinda nice.

Link to comment
Share on other sites

Link to post
Share on other sites

46 minutes ago, Gat Pelsinger said:

@QuantumRand

 

1) The method signature is actually correct, you have copied it wrong.

 

2) I cannot do i + 1. That parameter requires a type as a parameter, which will be the type of the next argument of the function. I was told that whenever I call va_arg, it jumps to the next argument.

My mistake, you're correct, although my syntax for the signature will also work. The comma is optional and was added for backward compatibility with C.

 

I think I see your issue now though. You're breaking your link at the end of your loop when you set current = current->next (which is NULL).

// original code
struct ValueList* current = vlist->next; //a new pointer for pointer arithmetic
for (size_t i = 0; i < valueNum - 1; i++) {
  current = (struct ValueList*)malloc(sizeof(struct ValueList)); //allocation for every new value node
  if (current == NULL){
    fprintf(stderr, "HeapAllocationFailedException");
    exit(1);
  }
  current->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
  current->next = NULL;
  current = current->next;
}

// fixed maybe?
struct ValueList* current = *vlist; //a new pointer for pointer arithmetic
for (size_t i = 0; i < valueNum - 1; i++) {
  current->next = (struct ValueList*)malloc(sizeof(struct ValueList)); //allocation for every new value node
  current = current->next;
  if (current == NULL){
    fprintf(stderr, "HeapAllocationFailedException");
    exit(1);
  }
  current->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
  current->next = NULL;
}

 

 

Link to comment
Share on other sites

Link to post
Share on other sites

Might want to do a bit more debugging/tracing on your own to find out where the problem lies and ask about that snippet if you're confused instead of dumping a bunch of code it'd take someone half an hour to make sense of, let alone try to debug.

 

That said this jumps out:

 

struct HashTable* createHashTable(){
    struct HashTable* table = (struct HashTable*)malloc(sizeof(struct HashTable));
    for(size_t i = 0; i < TABLE_SIZE; i++){
        table->table[i] = NULL;
    }
    return table;
}

you iterate over TABLE_SIZE which is 99 yet only allocated memory for one item, that'll segfault alright.

 

F@H
Desktop: i9-13900K, ASUS Z790-E, 64GB DDR5-6000 CL36, RTX3080, 2TB MP600 Pro XT, 2TB SX8200Pro, 2x16TB Ironwolf RAID0, Corsair HX1200, Antec Vortex 360 AIO, Thermaltake Versa H25 TG, Samsung 4K curved 49" TV, 23" secondary, Mountain Everest Max

Mobile SFF rig: i9-9900K, Noctua NH-L9i, Asrock Z390 Phantom ITX-AC, 32GB, GTX1070, 2x1TB SX8200Pro RAID0, 2x5TB 2.5" HDD RAID0, Athena 500W Flex (Noctua fan), Custom 4.7l 3D printed case

 

Asus Zenbook UM325UA, Ryzen 7 5700u, 16GB, 1TB, OLED

 

GPD Win 2

Link to comment
Share on other sites

Link to post
Share on other sites

@QuantumRand

 

Ah yes, you didn't quite solve the problem but you at least pointed out to the correct part of the code which had the problem, and looks like I did solve it. Instead of setting current to vlist->next, I just do current = vlist, and do all my calculations through current->next as I will have a base pointer which is not NULL and is connected in the linked list. Later after mallocing for current->next, I can set current = current->next to directly go through current instead of current -> next.

 

And as for @Kilrah, no, my HashTable is an array pointer, not just a pointer. Because I have set it to an array and set a size (table_size), when I malloc for HashTable, it also mallocs for all the arrays. I can confirm by hovering over the sizeof operator which shows 792 which is obviously a lot more than just one index of the array.

Microsoft owns my soul.

 

Also, Dell is evil, but HP kinda nice.

Link to comment
Share on other sites

Link to post
Share on other sites

44 minutes ago, Gat Pelsinger said:

Ah yes, you didn't quite solve the problem but you at least pointed out to the correct part of the code which had the problem, and looks like I did solve it. Instead of setting current to vlist->next, I just do current = vlist, and do all my calculations through current->next as I will have a base pointer which is not NULL and is connected in the linked list. Later after mallocing for current->next, I can set current = current->next to directly go through current instead of current -> next.

It's been a long time since I've had to think in pointers, so I had to edit my code snippet a few times. I think it's correct now.

Link to comment
Share on other sites

Link to post
Share on other sites

39 minutes ago, Gat Pelsinger said:

And as for @Kilrah, no, my HashTable is an array pointer, not just a pointer. Because I have set it to an array and set a size (table_size), when I malloc for HashTable, it also mallocs for all the arrays. I can confirm by hovering over the sizeof operator which shows 792 which is obviously a lot more than just one index of the array.

Ah yes there's an array in the struct. Oof. 

F@H
Desktop: i9-13900K, ASUS Z790-E, 64GB DDR5-6000 CL36, RTX3080, 2TB MP600 Pro XT, 2TB SX8200Pro, 2x16TB Ironwolf RAID0, Corsair HX1200, Antec Vortex 360 AIO, Thermaltake Versa H25 TG, Samsung 4K curved 49" TV, 23" secondary, Mountain Everest Max

Mobile SFF rig: i9-9900K, Noctua NH-L9i, Asrock Z390 Phantom ITX-AC, 32GB, GTX1070, 2x1TB SX8200Pro RAID0, 2x5TB 2.5" HDD RAID0, Athena 500W Flex (Noctua fan), Custom 4.7l 3D printed case

 

Asus Zenbook UM325UA, Ryzen 7 5700u, 16GB, 1TB, OLED

 

GPD Win 2

Link to comment
Share on other sites

Link to post
Share on other sites

I think I see a potential problem when you're constructing your linked list, here
 

        struct ValueList* current = vlist->next; //a new pointer for pointer arithmetic
        for (size_t i = 0; i < valueNum - 1; i++) {
            current = (struct ValueList*)malloc(sizeof(struct ValueList)); //allocation for every new value node
            if (current == NULL){
                fprintf(stderr, "HeapAllocationFailedException");
                exit(1);
            }
            current->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
            current->next = NULL;
            current = current->next;
        }
    

Your list isn't actually connected. Your list head starts as vlist, with a next pointer of NULL. You then start looping over the varargs, creating new list nodes for each argument. The problem is that you never attach these back to vlist, which retains a NULL value for its ->next pointer. I presume then that when you try to traverse this list, you dereference a NULL and segfault.

Try using a second level of indirection instead, something like this,
 

        struct ValueList** current = &(vlist->next); //a new pointer for pointer arithmetic
        for (size_t i = 0; i < valueNum - 1; i++) {
            *current = (struct ValueList*)malloc(sizeof(struct ValueList)); //allocation for every new value node
            if (*current == NULL){
                fprintf(stderr, "HeapAllocationFailedException");
                exit(1);
            }
            (*current)->value = strdup(va_arg(args, char*)); //store the given value from args to the linked list
            (*current)->next = NULL;
            current = &(current->next);
        }
    

Note that current is now a ValueList**, allowing you to update the value of vlist->next when you allocate a new node. There's ways to do this without the extra indirection, but this seems to me to be the cleanest approach, and is likely what you were trying to go for.

 

Pointers are hard. I haven't actually tested that code, so there might be some minor bugs with it (again, pointers are hard), but I think the general approach should work for you.

EDIT: Also, to address this question,
 

11 hours ago, Gat Pelsinger said:

Edit - Also, should I really use strcpy instead of strdup? strdup passes a pointer to the string but with strcpy I directly get the string.

I think you have it backwards. The difference between strdup and strcpy is that strcpy requires you to allocate a buffer for the string and pass it as the second argument. strdup allocates the buffer for you and just returns it. strcpy technically gives you more control over memory management, because you allocate the buffer, but I don't see any compelling reason for you to use it over strdup in this case.

Historically, strcpy was a C standard library function and strdup was a POSIX function, meaning that strcpy was more portable. However, as of C23 strdup is officially part of the C standard library now.

Link to comment
Share on other sites

Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

×