summary refs log tree commit diff
path: root/third_party/gldc/src/aligned_vector.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/gldc/src/aligned_vector.h')
-rw-r--r--third_party/gldc/src/aligned_vector.h207
1 files changed, 207 insertions, 0 deletions
diff --git a/third_party/gldc/src/aligned_vector.h b/third_party/gldc/src/aligned_vector.h
new file mode 100644
index 0000000..7b152db
--- /dev/null
+++ b/third_party/gldc/src/aligned_vector.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <malloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+#define AV_FORCE_INLINE static inline
+#else
+#define AV_NO_INSTRUMENT inline __attribute__((no_instrument_function))
+#define AV_INLINE_DEBUG AV_NO_INSTRUMENT __attribute__((always_inline))
+#define AV_FORCE_INLINE static AV_INLINE_DEBUG
+#endif
+
+
+#ifdef __DREAMCAST__
+#include <kos/string.h>
+
+AV_FORCE_INLINE void *AV_MEMCPY4(void *dest, const void *src, size_t len)
+{
+  if(!len)
+  {
+    return dest;
+  }
+
+  const uint8_t *s = (uint8_t *)src;
+  uint8_t *d = (uint8_t *)dest;
+
+  uint32_t diff = (uint32_t)d - (uint32_t)(s + 1); // extra offset because input gets incremented before output is calculated
+  // Underflow would be like adding a negative offset
+
+  // Can use 'd' as a scratch reg now
+  asm volatile (
+    "clrs\n" // Align for parallelism (CO) - SH4a use "stc SR, Rn" instead with a dummy Rn
+  ".align 2\n"
+  "0:\n\t"
+    "dt %[size]\n\t" // (--len) ? 0 -> T : 1 -> T (EX 1)
+    "mov.b @%[in]+, %[scratch]\n\t" // scratch = *(s++) (LS 1/2)
+    "bf.s 0b\n\t" // while(s != nexts) aka while(!T) (BR 1/2)
+    " mov.b %[scratch], @(%[offset], %[in])\n" // *(datatype_of_s*) ((char*)s + diff) = scratch, where src + diff = dest (LS 1)
+    : [in] "+&r" ((uint32_t)s), [scratch] "=&r" ((uint32_t)d), [size] "+&r" (len) // outputs
+    : [offset] "z" (diff) // inputs
+    : "t", "memory" // clobbers
+  );
+
+  return dest;
+}
+
+#else
+#define AV_MEMCPY4 memcpy
+#endif
+#define AV_ELEMENT_SIZE 32
+
+typedef struct {
+    uint32_t size;
+    uint32_t capacity;
+} __attribute__((aligned(32))) AlignedVectorHeader;
+
+typedef struct {
+    AlignedVectorHeader hdr;
+    uint8_t* data;
+} AlignedVector;
+
+#define ALIGNED_VECTOR_CHUNK_SIZE 256u
+
+
+#define ROUND_TO_CHUNK_SIZE(v) \
+    ((((v) + ALIGNED_VECTOR_CHUNK_SIZE - 1) / ALIGNED_VECTOR_CHUNK_SIZE) * ALIGNED_VECTOR_CHUNK_SIZE)
+
+
+AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint32_t index) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    assert(index < hdr->size);
+    return vector->data + (index * AV_ELEMENT_SIZE);
+}
+
+AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) {
+    AlignedVectorHeader* hdr = &vector->hdr;
+    uint32_t original_byte_size = (hdr->size * AV_ELEMENT_SIZE);
+
+    if(element_count < hdr->capacity) {
+        return vector->data + original_byte_size;
+    }
+
+    /* We overallocate so that we don't make small allocations during push backs */
+    element_count = ROUND_TO_CHUNK_SIZE(element_count);
+
+    uint32_t new_byte_size = (element_count * AV_ELEMENT_SIZE);
+    uint8_t* original_data = vector->data;
+
+    vector->data = (uint8_t*) memalign(0x20, new_byte_size);
+    assert(vector->data);
+
+    AV_MEMCPY4(vector->data, original_data, original_byte_size);
+    free(original_data);
+
+    hdr->capacity = element_count;
+    return vector->data + original_byte_size;
+}
+
+AV_FORCE_INLINE AlignedVectorHeader* aligned_vector_header(const AlignedVector* vector) {
+    return (AlignedVectorHeader*) &vector->hdr;
+}
+
+AV_FORCE_INLINE uint32_t aligned_vector_size(const AlignedVector* vector) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    return hdr->size;
+}
+
+AV_FORCE_INLINE uint32_t aligned_vector_capacity(const AlignedVector* vector) {
+    const AlignedVectorHeader* hdr = &vector->hdr;
+    return hdr->capacity;
+}
+
+AV_FORCE_INLINE void* aligned_vector_front(const AlignedVector* vector) {
+    return vector->data;
+}
+
+#define av_assert(x) \
+    do {\
+        if(!(x)) {\
+            fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+            exit(1);\
+        }\
+    } while(0); \
+
+/* Resizes the array and returns a pointer to the first new element (if upsizing) or NULL (if downsizing) */
+AV_FORCE_INLINE void* aligned_vector_resize(AlignedVector* vector, const uint32_t element_count) {
+    void* ret = NULL;
+
+    AlignedVectorHeader* hdr = &vector->hdr;
+    uint32_t previous_count = hdr->size;
+    if(hdr->capacity <= element_count) {
+        /* If we didn't have capacity, increase capacity (slow) */
+
+        aligned_vector_reserve(vector, element_count);
+        hdr->size = element_count;
+
+        ret = aligned_vector_at(vector, previous_count);
+
+        av_assert(hdr->size == element_count);
+        av_assert(hdr->size <= hdr->capacity);
+    } else if(previous_count < element_count) {
+        /* So we grew, but had the capacity, just get a pointer to
+         * where we were */
+        hdr->size = element_count;
+        av_assert(hdr->size < hdr->capacity);
+        ret = aligned_vector_at(vector, previous_count);
+    } else if(hdr->size != element_count) {
+        hdr->size = element_count;
+        av_assert(hdr->size < hdr->capacity);
+    }
+
+    return ret;
+}
+
+AV_FORCE_INLINE void* aligned_vector_push_back(AlignedVector* vector, const void* objs, uint32_t count) {
+    /* Resize enough room */
+    AlignedVectorHeader* hdr = &vector->hdr;
+
+    assert(count);
+#ifndef NDEBUG
+    uint32_t initial_size = hdr->size;
+#endif
+
+    uint8_t* dest = (uint8_t*) aligned_vector_resize(vector, hdr->size + count);
+    assert(dest);
+
+    /* Copy the objects in */
+    AV_MEMCPY4(dest, objs, count * AV_ELEMENT_SIZE);
+
+    assert(hdr->size == initial_size + count);
+    return dest;
+}
+
+
+AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_t additional_count) {
+    AlignedVectorHeader* hdr = &vector->hdr;
+    void* ret = aligned_vector_resize(vector, hdr->size + additional_count);
+    assert(ret);  // Should always return something
+    return ret;
+}
+
+AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
+    AlignedVectorHeader* hdr = &vector->hdr;
+    hdr->size = 0;
+}
+
+AV_FORCE_INLINE void aligned_vector_init(AlignedVector* vector) {
+    /* Now initialize the header*/
+    AlignedVectorHeader* const hdr = &vector->hdr;
+    hdr->size = 0;
+    hdr->capacity = 0;
+    vector->data = NULL;
+}
+
+
+#ifdef __cplusplus
+}
+#endif