From f420c39a4a1b0733158f0d51b7f2fe6fc456c853 Mon Sep 17 00:00:00 2001 From: Anton Lydike Date: Fri, 20 May 2022 20:42:59 +0200 Subject: [PATCH] cleaned up a lot of code, but sadly no real speed up yet. Plan on debugging and fine-tuning later tho --- .idea/misc.xml | 4 ++ .idea/raymarcher.iml | 8 --- Makefile | 5 +- bench.c | 39 +++++++------- marcher.c | 2 +- src/camera.c | 119 +++++++++++++++++++++++++++++++------------ src/camera.h | 2 +- src/point.c | 65 +++++++++++++---------- src/point.h | 1 + src/scene.c | 2 +- 10 files changed, 153 insertions(+), 94 deletions(-) create mode 100644 .idea/misc.xml delete mode 100644 .idea/raymarcher.iml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..79b3c94 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/raymarcher.iml b/.idea/raymarcher.iml deleted file mode 100644 index bc2cd87..0000000 --- a/.idea/raymarcher.iml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/Makefile b/Makefile index 273fe2f..311d849 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ -OPTIMIZATION=-O3 +OPTIMIZATION=-O3 -flto CC=gcc -CFLAGS=-Isrc/ -lm -Wall -Wextra -pedantic-errors $(OPTIMIZATION) +CFLAGS=-Isrc/ -lm -lpthread -Wall -Wextra -pedantic-errors $(OPTIMIZATION) .PHONY: directories @@ -21,7 +21,6 @@ obj/camera.o: src/camera.c src/camera.h obj/images.o: images/src/images.c images/src/images.h $(CC) $(CFLAGS) -c -o $@ images/src/images.c - march: obj/camera.o obj/scene.o obj/point.o obj/images.o $(CC) $(CFLAGS) -o out/march $^ marcher.c diff --git a/bench.c b/bench.c index 20bc774..138e588 100644 --- a/bench.c +++ b/bench.c @@ -11,10 +11,6 @@ #include "src/point.h" -typedef int bool; -#define true 1 -#define false 0 - #define BENCH_VERSION "1.0" @@ -28,12 +24,12 @@ typedef int bool; */ double mandelbulb_dist(struct point pt, struct scene_object *self) { - int iters = self->args[0]; + int iters = (int) self->args[0]; double power = self->args[1]; struct point z = pt; - float dr = 1.0; - float r = 0.0; + double dr = 1.0; + double r = 0.0; for (int i = 0; i < iters ; i++) { r = pt_length(z); @@ -42,12 +38,12 @@ double mandelbulb_dist(struct point pt, struct scene_object *self) { } // convert to polar coordinates - float theta = acos(z.z/r); - float phi = atan2(z.y,z.x); + double theta = acos(z.z/r); + double phi = atan2(z.y,z.x); dr = pow(r, power-1.0)*power*dr + 1.0; // scale and rotate the point - float zr = pow(r, power); + double zr = pow(r, power); theta = theta*power; phi = phi*power; @@ -81,11 +77,11 @@ struct scene_object mandelbulb_new(struct point location, int iters, double powe return so; } -int run_bench(int size, float pow, int threads, const char path[], bool save) { - float cam_position = 1.15; +int run_bench(int size, double pow, int threads, const char path[], int save) { + double cam_position = 1.15; int steps = 2000; int iters = 1000; - float threshold = 0.0001; + double threshold = 0.0001; struct camera cam; cam.fov = 90; @@ -107,7 +103,7 @@ int run_bench(int size, float pow, int threads, const char path[], bool save) { image_save_bmp(*img, path); } - image_destroy_shared(*img); + image_destroy(*img); scene_destroy(scene); return 0; @@ -134,11 +130,10 @@ void timer_print(struct timer t) { printf("\nBenchmark %s took %ldms (%.2fs)\n", t.name, time, time / 1000.0f); } -int main(int argc, char *argv[]) +int main() { - int threads = get_nprocs(); + int threads = get_nprocs() / 2; struct timer bench; - int size = 1080; printf("Mandelbulb Benchmark v%s\n\nDetected %d threads...\n", BENCH_VERSION, threads); @@ -146,7 +141,7 @@ int main(int argc, char *argv[]) bench.name = "1080px render with saving"; timer_start(&bench); - run_bench(1080, 3.0, threads, "bench-pow3-1080p.bmp", true); + run_bench(1080, 3.0, threads, "bench-pow3-1080p.bmp", 1); timer_end(&bench); timer_print(bench); @@ -154,7 +149,7 @@ int main(int argc, char *argv[]) bench.name = "1080px render without saving"; timer_start(&bench); - run_bench(1080, 3.0, threads, "", false); + run_bench(1080, 3.0, threads, "", 0); timer_end(&bench); timer_print(bench); @@ -162,7 +157,7 @@ int main(int argc, char *argv[]) bench.name = "10 megapixel render with saving"; timer_start(&bench); - run_bench(3162, 3.0, threads, "bench-pow3-10mpx.bmp", true); + run_bench(3162, 3.0, threads, "bench-pow3-10mpx.bmp", 1); timer_end(&bench); timer_print(bench); @@ -170,7 +165,7 @@ int main(int argc, char *argv[]) bench.name = "40 megapixel render with saving"; timer_start(&bench); - run_bench(6324, 3.0, threads, "bench-pow3-40mpx.bmp", true); + run_bench(6324, 3.0, threads, "bench-pow3-40mpx.bmp", 1); timer_end(&bench); timer_print(bench); @@ -178,7 +173,7 @@ int main(int argc, char *argv[]) bench.name = "1080px render single threaded without saving"; timer_start(&bench); - run_bench(1080, 3.0, 1, "", false); + run_bench(1080, 3.0, 1, "", 0); timer_end(&bench); timer_print(bench); diff --git a/marcher.c b/marcher.c index c32562b..4a786ae 100644 --- a/marcher.c +++ b/marcher.c @@ -160,7 +160,7 @@ int main(int argc, char* argv[]) { image_save_bmp(*img, path); - image_destroy_shared(*img); + image_destroy(*img); scene_destroy(scene); return 0; diff --git a/src/camera.c b/src/camera.c index 54a493b..7bcfe6e 100644 --- a/src/camera.c +++ b/src/camera.c @@ -1,11 +1,25 @@ -#include #include #include #include #include +#include #include "point.h" #include "camera.h" +struct thread_args { + struct point start; + int thread_id; + int thread_count; + int height; + int width; + struct point move_up; + struct point move_right; + void (*callback)(struct point, int, int); +}; + +static void * camera_iterate_rays_const_dist_thread(void* args); + + struct camera camera_new(struct point direction, unsigned int fov) { struct camera camera; camera.location = (struct point) { @@ -26,10 +40,9 @@ void camera_set_looking_at(struct camera *cam, struct point origin, struct point cam->direction = pt_normalize(pt_sub(thing, origin)); } - -void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int threads, void (*callback)(struct point, int, int)) { - // negative threads => single threaded. - if (threads < 0) threads = 0; +void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int thread_count, void (*callback)(struct point, int, int)) { + // negative thread_count => single threaded. + if (thread_count < 0) thread_count = 0; struct point span_z, span_xy; @@ -52,46 +65,88 @@ void camera_iterate_rays_const_dist(struct camera camera, int width, int height, starting_point = pt_add(starting_point, pt_mult(move_right, - width / (double) 2)); starting_point = pt_add(starting_point, pt_mult(move_up, - height / (double) 2)); - // initialize threads - int thread_id = 0; - for (int i = 0; i < threads - 1; i++) { - if (fork() == 0) { - thread_id = i + 1; - break; - } + if (thread_count < 2) { + //TODO implement single threaded work here + struct thread_args arg = { + .start = starting_point, + .thread_id = 0, + .thread_count = 1, + .height = height, + .width = width, + .move_up = move_up, + .move_right = move_right, + .callback = callback + }; + + camera_iterate_rays_const_dist_thread(&arg); + + return; + } + + // initialize thread_count + pthread_t * threads = malloc(sizeof(pthread_t) * thread_count); + struct thread_args* args = malloc(sizeof(struct thread_args) * thread_count); + + for (int i = 0; i < thread_count; i++) { + args[i] = (struct thread_args) { + .start = starting_point, + .thread_id = i, + .thread_count = thread_count, + .height = height, + .width = width, + .move_up = move_up, + .move_right = move_right, + .callback = callback + }; + + pthread_create(threads + i, NULL, camera_iterate_rays_const_dist_thread, (void*) (args + i)); + } + + //struct thread_args { + // struct point start; + // int thread_id; + // int thread_count; + // int height; + // int width; + // struct point move_up; + // struct point move_right; + // void (*callback)(struct point, int, int); + //}; + + + for (int i = 0; i < thread_count; i++) { + pthread_join(threads[i], NULL); } +} + +static void * camera_iterate_rays_const_dist_thread(void* arg_ptr) { + // explicit cast to make gcc happy + struct thread_args* args = (struct thread_args*) arg_ptr; // this point is moved for every pixel - struct point curr_pt = starting_point; + struct point curr_pt = args->start; // (0,0) screenspace is bottom left corner - for (int y = 0; y < height; y++) { + for (int y = 0; y < args->height; y++) { // move one row up (this has to be done in every thread!) - starting_point = pt_add(starting_point, move_up); + args->start = pt_add(args->start, args->move_up); // only render the lines this thread is responsible for - if (y % threads != thread_id) continue; - + if (y % args->thread_count != args->thread_id) continue; + // display progress in percent - if (height > 200 && y % (height / 100) == 0 && y != 0) { - printf("\r%02i%%", (y * 100) / height); + if (args->height > 200 && y % (args->height / 100) == 0 && y != 0) { + printf("\r%02i%%", (y * 100) / args->height); fflush(stdout); } // actually iterate this line - curr_pt = starting_point; - for (int x = 0; x < width; x++) { - callback(curr_pt, x, y); - curr_pt = pt_add(curr_pt, move_right); // move pt right to next pt + curr_pt = args->start; + for (int x = 0; x < args->width; x++) { + args->callback(curr_pt, x, y); + curr_pt = pt_add(curr_pt, args->move_right); // move pt right to next pt } } - if (thread_id != 0) { - exit(0); - } - - int status; - for (int i = 0; i < threads - 1; i++) { - while(wait(&status) > 0) {} - } -} \ No newline at end of file + return NULL; +} diff --git a/src/camera.h b/src/camera.h index b766c3f..2fb6318 100644 --- a/src/camera.h +++ b/src/camera.h @@ -8,4 +8,4 @@ struct camera { struct camera camera_new(struct point direction, unsigned int fov); void camera_set_looking_at(struct camera *cam, struct point origin, struct point thing); -void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int threads, void (*callback)(struct point, int, int)); \ No newline at end of file +void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int thread_count, void (*callback)(struct point, int, int)); \ No newline at end of file diff --git a/src/point.c b/src/point.c index 219a70c..bb928f4 100644 --- a/src/point.c +++ b/src/point.c @@ -2,17 +2,10 @@ #include #include "point.h" -// scale vector to length -struct point pt_scale(struct point pt, double length) { - double f = length / pt_length(pt); - return (struct point) { - .x = pt.x * f, - .y = pt.y * f, - .z = pt.z * f - }; -} +// get the length of vector +inline double pt_length_inline (struct point pt) __attribute__((always_inline)); -struct point pt_mult(struct point pt, double scalar) { +inline struct point pt_mult(struct point pt, double scalar) { return (struct point) { .x = pt.x * scalar, .y = pt.y * scalar, @@ -21,7 +14,7 @@ struct point pt_mult(struct point pt, double scalar) { } // return internal angle between a and b -double pt_angle(struct point a, struct point b) { +inline double pt_angle(struct point a, struct point b) { return acos(pt_dot( pt_normalize(a), pt_normalize(b) @@ -29,12 +22,16 @@ double pt_angle(struct point a, struct point b) { } // get the length of vector -double pt_length(struct point pt) { +inline double pt_length_inline (struct point pt) { return sqrt((pt.x * pt.x) + (pt.y * pt.y) + (pt.z * pt.z)); } +double pt_length(struct point pt) { + return pt_length_inline(pt); +} + // add the vector add to the vector pt -struct point pt_add(struct point pt, struct point add) { +inline struct point pt_add(struct point pt, struct point add) { return (struct point) { .x = pt.x + add.x, .y = pt.y + add.y, @@ -43,7 +40,7 @@ struct point pt_add(struct point pt, struct point add) { } // add the vector add to the vector pt -struct point pt_sub(struct point pt, struct point sub) { +inline struct point pt_sub(struct point pt, struct point sub) { return (struct point) { .x = pt.x - sub.x, .y = pt.y - sub.y, @@ -51,22 +48,38 @@ struct point pt_sub(struct point pt, struct point sub) { }; } -double pt_dist(struct point p1, struct point p2) { - return pt_length(pt_sub(p1, p2)); +inline double pt_dist(struct point p1, struct point p2) { + return pt_length_inline(pt_sub(p1, p2)); } // normalize a vector -struct point pt_normalize(struct point pt) { - return pt_scale(pt, 1); +inline struct point pt_normalize(struct point pt) { + double length = pt_length(pt); + + return (struct point) { + .x = pt.x / length, + .y = pt.y / length, + .z = pt.z / length + }; +} + +// scale vector to length +inline struct point pt_scale(struct point pt, double length) { + double f = length / pt_length_inline(pt); + return (struct point) { + .x = pt.x * f, + .y = pt.y * f, + .z = pt.z * f + }; } // dot product of two vectors -double pt_dot(struct point a, struct point b) { +inline double pt_dot(struct point a, struct point b) { return a.x*b.x + a.y*b.y + a.z*b.z; } // cross product of two vectors -struct point pt_cross(struct point a, struct point b) { +inline struct point pt_cross(struct point a, struct point b) { return (struct point) { .x = a.y*b.z - a.z*b.y, .y = a.z*b.x - a.x*b.z, @@ -74,29 +87,29 @@ struct point pt_cross(struct point a, struct point b) { }; } -void pt_print(struct point pt) { +inline void pt_print(struct point pt) { printf("(%f, %f, %f)\n", pt.x, pt.y, pt.z); } -void pt_print_n(const char* name, struct point pt) { +inline void pt_print_n(const char* name, struct point pt) { printf("%s: (%f, %f, %f)\n", name, pt.x, pt.y, pt.z); } // find two vectors that span the orthogonal plane, where // span_xy is a vector lying on the xy-plane (and pointing left) // and span_z is orthogonal to span_xy pointing "upwards" -void pt_orthogonal_plane(struct point pt, struct point *span_z, struct point *span_xy) { +inline void pt_orthogonal_plane(struct point pt, struct point *span_z, struct point *span_xy) { pt = pt_normalize(pt); // get the vector lying on the xy axis - // this is done by - *span_xy = pt_normalize(pt_cross((struct point){.x = 0, .y = 0, .z = 1}, pt)); // points to the "left" (of the viewing direction) + // this is done by + *span_xy = pt_normalize(pt_cross(PT_NEW(0,0,1), pt)); // points to the "left" (of the viewing direction) // now use this, to find the vector *span_z = pt_normalize(pt_cross(pt, *span_xy)); } -struct point pt_mod(struct point pt, double mod) { +inline struct point pt_mod(struct point pt, double mod) { return (struct point) { .x = fabs(fmod(pt.x, mod)), .y = fabs(fmod(pt.y, mod)), diff --git a/src/point.h b/src/point.h index 69cb378..62bde2d 100644 --- a/src/point.h +++ b/src/point.h @@ -8,6 +8,7 @@ struct point { }; #define PT_ZERO ((struct point) {.x=0, .y=0, .z=0}) +#define PT_NEW(x,y,z) ((struct point){(x), (y), (z)}) struct point pt_scale(struct point pt, double length); struct point pt_normalize(struct point pt); diff --git a/src/scene.c b/src/scene.c index bad4222..b62fab6 100644 --- a/src/scene.c +++ b/src/scene.c @@ -51,7 +51,7 @@ Image* render_scene(struct scene *scene, struct camera *camera, unsigned int thr current_camera= camera; // initialize shared pixel buffer - image_new_shared(scene->width, scene->height, current_image); + image_new(scene->width, scene->height, current_image); // iterate over the rays camera_iterate_rays_const_dist(*camera, scene->width, scene->height, threads, camera_iter_callback);