cleaned up a lot of code, but sadly no real speed up yet. Plan on debugging and fine-tuning later tho

bench
Anton Lydike 3 years ago
parent 693f00e113
commit f420c39a4a

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
</project>

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="CPP_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

@ -1,8 +1,8 @@
OPTIMIZATION=-O3
OPTIMIZATION=-O3 -flto
CC=gcc
CFLAGS=-Isrc/ -lm -Wall -Wextra -pedantic-errors $(OPTIMIZATION)
CFLAGS=-Isrc/ -lm -lpthread -Wall -Wextra -pedantic-errors $(OPTIMIZATION)
.PHONY: directories
@ -21,7 +21,6 @@ obj/camera.o: src/camera.c src/camera.h
obj/images.o: images/src/images.c images/src/images.h
$(CC) $(CFLAGS) -c -o $@ images/src/images.c
march: obj/camera.o obj/scene.o obj/point.o obj/images.o
$(CC) $(CFLAGS) -o out/march $^ marcher.c

@ -11,10 +11,6 @@
#include "src/point.h"
typedef int bool;
#define true 1
#define false 0
#define BENCH_VERSION "1.0"
@ -28,12 +24,12 @@ typedef int bool;
*/
double mandelbulb_dist(struct point pt, struct scene_object *self) {
int iters = self->args[0];
int iters = (int) self->args[0];
double power = self->args[1];
struct point z = pt;
float dr = 1.0;
float r = 0.0;
double dr = 1.0;
double r = 0.0;
for (int i = 0; i < iters ; i++) {
r = pt_length(z);
@ -42,12 +38,12 @@ double mandelbulb_dist(struct point pt, struct scene_object *self) {
}
// convert to polar coordinates
float theta = acos(z.z/r);
float phi = atan2(z.y,z.x);
double theta = acos(z.z/r);
double phi = atan2(z.y,z.x);
dr = pow(r, power-1.0)*power*dr + 1.0;
// scale and rotate the point
float zr = pow(r, power);
double zr = pow(r, power);
theta = theta*power;
phi = phi*power;
@ -81,11 +77,11 @@ struct scene_object mandelbulb_new(struct point location, int iters, double powe
return so;
}
int run_bench(int size, float pow, int threads, const char path[], bool save) {
float cam_position = 1.15;
int run_bench(int size, double pow, int threads, const char path[], int save) {
double cam_position = 1.15;
int steps = 2000;
int iters = 1000;
float threshold = 0.0001;
double threshold = 0.0001;
struct camera cam;
cam.fov = 90;
@ -107,7 +103,7 @@ int run_bench(int size, float pow, int threads, const char path[], bool save) {
image_save_bmp(*img, path);
}
image_destroy_shared(*img);
image_destroy(*img);
scene_destroy(scene);
return 0;
@ -134,11 +130,10 @@ void timer_print(struct timer t) {
printf("\nBenchmark %s took %ldms (%.2fs)\n", t.name, time, time / 1000.0f);
}
int main(int argc, char *argv[])
int main()
{
int threads = get_nprocs();
int threads = get_nprocs() / 2;
struct timer bench;
int size = 1080;
printf("Mandelbulb Benchmark v%s\n\nDetected %d threads...\n", BENCH_VERSION, threads);
@ -146,7 +141,7 @@ int main(int argc, char *argv[])
bench.name = "1080px render with saving";
timer_start(&bench);
run_bench(1080, 3.0, threads, "bench-pow3-1080p.bmp", true);
run_bench(1080, 3.0, threads, "bench-pow3-1080p.bmp", 1);
timer_end(&bench);
timer_print(bench);
@ -154,7 +149,7 @@ int main(int argc, char *argv[])
bench.name = "1080px render without saving";
timer_start(&bench);
run_bench(1080, 3.0, threads, "", false);
run_bench(1080, 3.0, threads, "", 0);
timer_end(&bench);
timer_print(bench);
@ -162,7 +157,7 @@ int main(int argc, char *argv[])
bench.name = "10 megapixel render with saving";
timer_start(&bench);
run_bench(3162, 3.0, threads, "bench-pow3-10mpx.bmp", true);
run_bench(3162, 3.0, threads, "bench-pow3-10mpx.bmp", 1);
timer_end(&bench);
timer_print(bench);
@ -170,7 +165,7 @@ int main(int argc, char *argv[])
bench.name = "40 megapixel render with saving";
timer_start(&bench);
run_bench(6324, 3.0, threads, "bench-pow3-40mpx.bmp", true);
run_bench(6324, 3.0, threads, "bench-pow3-40mpx.bmp", 1);
timer_end(&bench);
timer_print(bench);
@ -178,7 +173,7 @@ int main(int argc, char *argv[])
bench.name = "1080px render single threaded without saving";
timer_start(&bench);
run_bench(1080, 3.0, 1, "", false);
run_bench(1080, 3.0, 1, "", 0);
timer_end(&bench);
timer_print(bench);

@ -160,7 +160,7 @@ int main(int argc, char* argv[]) {
image_save_bmp(*img, path);
image_destroy_shared(*img);
image_destroy(*img);
scene_destroy(scene);
return 0;

@ -1,11 +1,25 @@
#include <unistd.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include "point.h"
#include "camera.h"
struct thread_args {
struct point start;
int thread_id;
int thread_count;
int height;
int width;
struct point move_up;
struct point move_right;
void (*callback)(struct point, int, int);
};
static void * camera_iterate_rays_const_dist_thread(void* args);
struct camera camera_new(struct point direction, unsigned int fov) {
struct camera camera;
camera.location = (struct point) {
@ -26,10 +40,9 @@ void camera_set_looking_at(struct camera *cam, struct point origin, struct point
cam->direction = pt_normalize(pt_sub(thing, origin));
}
void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int threads, void (*callback)(struct point, int, int)) {
// negative threads => single threaded.
if (threads < 0) threads = 0;
void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int thread_count, void (*callback)(struct point, int, int)) {
// negative thread_count => single threaded.
if (thread_count < 0) thread_count = 0;
struct point span_z, span_xy;
@ -52,46 +65,88 @@ void camera_iterate_rays_const_dist(struct camera camera, int width, int height,
starting_point = pt_add(starting_point, pt_mult(move_right, - width / (double) 2));
starting_point = pt_add(starting_point, pt_mult(move_up, - height / (double) 2));
// initialize threads
int thread_id = 0;
for (int i = 0; i < threads - 1; i++) {
if (fork() == 0) {
thread_id = i + 1;
break;
if (thread_count < 2) {
//TODO implement single threaded work here
struct thread_args arg = {
.start = starting_point,
.thread_id = 0,
.thread_count = 1,
.height = height,
.width = width,
.move_up = move_up,
.move_right = move_right,
.callback = callback
};
camera_iterate_rays_const_dist_thread(&arg);
return;
}
// initialize thread_count
pthread_t * threads = malloc(sizeof(pthread_t) * thread_count);
struct thread_args* args = malloc(sizeof(struct thread_args) * thread_count);
for (int i = 0; i < thread_count; i++) {
args[i] = (struct thread_args) {
.start = starting_point,
.thread_id = i,
.thread_count = thread_count,
.height = height,
.width = width,
.move_up = move_up,
.move_right = move_right,
.callback = callback
};
pthread_create(threads + i, NULL, camera_iterate_rays_const_dist_thread, (void*) (args + i));
}
//struct thread_args {
// struct point start;
// int thread_id;
// int thread_count;
// int height;
// int width;
// struct point move_up;
// struct point move_right;
// void (*callback)(struct point, int, int);
//};
for (int i = 0; i < thread_count; i++) {
pthread_join(threads[i], NULL);
}
}
static void * camera_iterate_rays_const_dist_thread(void* arg_ptr) {
// explicit cast to make gcc happy
struct thread_args* args = (struct thread_args*) arg_ptr;
// this point is moved for every pixel
struct point curr_pt = starting_point;
struct point curr_pt = args->start;
// (0,0) screenspace is bottom left corner
for (int y = 0; y < height; y++) {
for (int y = 0; y < args->height; y++) {
// move one row up (this has to be done in every thread!)
starting_point = pt_add(starting_point, move_up);
args->start = pt_add(args->start, args->move_up);
// only render the lines this thread is responsible for
if (y % threads != thread_id) continue;
if (y % args->thread_count != args->thread_id) continue;
// display progress in percent
if (height > 200 && y % (height / 100) == 0 && y != 0) {
printf("\r%02i%%", (y * 100) / height);
if (args->height > 200 && y % (args->height / 100) == 0 && y != 0) {
printf("\r%02i%%", (y * 100) / args->height);
fflush(stdout);
}
// actually iterate this line
curr_pt = starting_point;
for (int x = 0; x < width; x++) {
callback(curr_pt, x, y);
curr_pt = pt_add(curr_pt, move_right); // move pt right to next pt
curr_pt = args->start;
for (int x = 0; x < args->width; x++) {
args->callback(curr_pt, x, y);
curr_pt = pt_add(curr_pt, args->move_right); // move pt right to next pt
}
}
if (thread_id != 0) {
exit(0);
}
int status;
for (int i = 0; i < threads - 1; i++) {
while(wait(&status) > 0) {}
}
return NULL;
}

@ -8,4 +8,4 @@ struct camera {
struct camera camera_new(struct point direction, unsigned int fov);
void camera_set_looking_at(struct camera *cam, struct point origin, struct point thing);
void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int threads, void (*callback)(struct point, int, int));
void camera_iterate_rays_const_dist(struct camera camera, int width, int height, int thread_count, void (*callback)(struct point, int, int));

@ -2,17 +2,10 @@
#include <stdio.h>
#include "point.h"
// scale vector to length
struct point pt_scale(struct point pt, double length) {
double f = length / pt_length(pt);
return (struct point) {
.x = pt.x * f,
.y = pt.y * f,
.z = pt.z * f
};
}
// get the length of vector
inline double pt_length_inline (struct point pt) __attribute__((always_inline));
struct point pt_mult(struct point pt, double scalar) {
inline struct point pt_mult(struct point pt, double scalar) {
return (struct point) {
.x = pt.x * scalar,
.y = pt.y * scalar,
@ -21,7 +14,7 @@ struct point pt_mult(struct point pt, double scalar) {
}
// return internal angle between a and b
double pt_angle(struct point a, struct point b) {
inline double pt_angle(struct point a, struct point b) {
return acos(pt_dot(
pt_normalize(a),
pt_normalize(b)
@ -29,12 +22,16 @@ double pt_angle(struct point a, struct point b) {
}
// get the length of vector
double pt_length(struct point pt) {
inline double pt_length_inline (struct point pt) {
return sqrt((pt.x * pt.x) + (pt.y * pt.y) + (pt.z * pt.z));
}
double pt_length(struct point pt) {
return pt_length_inline(pt);
}
// add the vector add to the vector pt
struct point pt_add(struct point pt, struct point add) {
inline struct point pt_add(struct point pt, struct point add) {
return (struct point) {
.x = pt.x + add.x,
.y = pt.y + add.y,
@ -43,7 +40,7 @@ struct point pt_add(struct point pt, struct point add) {
}
// add the vector add to the vector pt
struct point pt_sub(struct point pt, struct point sub) {
inline struct point pt_sub(struct point pt, struct point sub) {
return (struct point) {
.x = pt.x - sub.x,
.y = pt.y - sub.y,
@ -51,22 +48,38 @@ struct point pt_sub(struct point pt, struct point sub) {
};
}
double pt_dist(struct point p1, struct point p2) {
return pt_length(pt_sub(p1, p2));
inline double pt_dist(struct point p1, struct point p2) {
return pt_length_inline(pt_sub(p1, p2));
}
// normalize a vector
struct point pt_normalize(struct point pt) {
return pt_scale(pt, 1);
inline struct point pt_normalize(struct point pt) {
double length = pt_length(pt);
return (struct point) {
.x = pt.x / length,
.y = pt.y / length,
.z = pt.z / length
};
}
// scale vector to length
inline struct point pt_scale(struct point pt, double length) {
double f = length / pt_length_inline(pt);
return (struct point) {
.x = pt.x * f,
.y = pt.y * f,
.z = pt.z * f
};
}
// dot product of two vectors
double pt_dot(struct point a, struct point b) {
inline double pt_dot(struct point a, struct point b) {
return a.x*b.x + a.y*b.y + a.z*b.z;
}
// cross product of two vectors
struct point pt_cross(struct point a, struct point b) {
inline struct point pt_cross(struct point a, struct point b) {
return (struct point) {
.x = a.y*b.z - a.z*b.y,
.y = a.z*b.x - a.x*b.z,
@ -74,29 +87,29 @@ struct point pt_cross(struct point a, struct point b) {
};
}
void pt_print(struct point pt) {
inline void pt_print(struct point pt) {
printf("(%f, %f, %f)\n", pt.x, pt.y, pt.z);
}
void pt_print_n(const char* name, struct point pt) {
inline void pt_print_n(const char* name, struct point pt) {
printf("%s: (%f, %f, %f)\n", name, pt.x, pt.y, pt.z);
}
// find two vectors that span the orthogonal plane, where
// span_xy is a vector lying on the xy-plane (and pointing left)
// and span_z is orthogonal to span_xy pointing "upwards"
void pt_orthogonal_plane(struct point pt, struct point *span_z, struct point *span_xy) {
inline void pt_orthogonal_plane(struct point pt, struct point *span_z, struct point *span_xy) {
pt = pt_normalize(pt);
// get the vector lying on the xy axis
// this is done by
*span_xy = pt_normalize(pt_cross((struct point){.x = 0, .y = 0, .z = 1}, pt)); // points to the "left" (of the viewing direction)
*span_xy = pt_normalize(pt_cross(PT_NEW(0,0,1), pt)); // points to the "left" (of the viewing direction)
// now use this, to find the vector
*span_z = pt_normalize(pt_cross(pt, *span_xy));
}
struct point pt_mod(struct point pt, double mod) {
inline struct point pt_mod(struct point pt, double mod) {
return (struct point) {
.x = fabs(fmod(pt.x, mod)),
.y = fabs(fmod(pt.y, mod)),

@ -8,6 +8,7 @@ struct point {
};
#define PT_ZERO ((struct point) {.x=0, .y=0, .z=0})
#define PT_NEW(x,y,z) ((struct point){(x), (y), (z)})
struct point pt_scale(struct point pt, double length);
struct point pt_normalize(struct point pt);

@ -51,7 +51,7 @@ Image* render_scene(struct scene *scene, struct camera *camera, unsigned int thr
current_camera= camera;
// initialize shared pixel buffer
image_new_shared(scene->width, scene->height, current_image);
image_new(scene->width, scene->height, current_image);
// iterate over the rays
camera_iterate_rays_const_dist(*camera, scene->width, scene->height, threads, camera_iter_callback);

Loading…
Cancel
Save