/** * @file "tree.c" * @purpose Implementation of Barnes Hut algorithms * @author Sam Moore (20503628) 2012 */ #include "tree.h" #include "math.h" #include #include "graphics.h" #ifdef USE_THREADS unsigned nested_used = 0; #endif //USE_THREADS /** * @function Node_SetSpace * @purpose Set the position and space occupied by a node * @param node - The Node * @param x - The vertex with the smallest coordinate values * @param size - The dimensions of the cube that the Node represents */ void Node_SetSpace(Node * node, float x[DIMENSIONS], float size[DIMENSIONS]) { for (unsigned i = 0; i < DIMENSIONS; ++i) { node->x[i] = x[i]; node->size[i] = size[i]; } } /** * @function Node_Destroy * @purpose Confuse Java programmers * *ahem* Cleanup the memory used by the Node and its children * @param node - The Node */ void Node_Destroy(Node * node) { if (node == NULL) return; //printf("Destroy node %p\n", (void*)node); if (node->body != NULL) { free(node->body); node->body = NULL; } node->allocated = 0; node->N = 0; #ifdef USE_THREADS Prepare_Threads(SUB_DIVISIONS); #pragma omp parallel for #endif //USE_THREADS for (unsigned i = 0; i < SUB_DIVISIONS; ++i) { Node_Destroy(node->children[i]); free(node->children[i]); node->children[i] = NULL; } } /** * @function Node_FindBodies * @purpose Search for bodies in a system within a Node, and add them to the Node's array of bodies. * @param n The Node * @param s The System * @returns The total number of bodies found from s within n */ unsigned Node_FindBodies(Node * n, System * s) { if (n->body != NULL) { // reset the body array first #ifdef USE_THREADS //Prepare_Threads(options.nested_threads); //#pragma omp parallel for #endif //USE_THREADS for (unsigned i = 0; i < n->allocated; ++i) n->body[i] = NULL; } n->N = 0; // Don't forget this! if (n->parent == NULL) { #ifdef USE_THREADS //Prepare_Threads(options.nested_threads); //#pragma omp parallel for #endif //USE_THREADS for (unsigned a = 0; a < s->N; ++a) { if (Node_ContainsBody(n, s->body+a)) Node_AddBody(n, s->body+a); } } else { #ifdef USE_THREADS //Prepare_Threads(options.nested_threads); //#pragma omp parallel for #endif //USE_THREADS for (unsigned a = 0; a < n->parent->allocated; ++a) { if (Node_ContainsBody(n, n->parent->body[a])) Node_AddBody(n, n->parent->body[a]); } } //printf("Node %p contains %d bodies\n", (void*)(n), n->N); return n->N; } /** * @function Node_Subdivide * @purpose Subdivide a Node into more nodes and fill them with bodies * @param node - The Node * @param s - The system which the bodies are from * @returns The number of bodies contained in all children of node after this is finished */ unsigned Node_Subdivide(Node * node, System * s) { if (Node_FindBodies(node, s) <= 1) // First find all bodies in the node return node->N; // If there was one or less, no need to subdivide unsigned count = 0; // stores the return value of the function float size[DIMENSIONS]; // size of the children of this node for (unsigned i = 0; i < DIMENSIONS; ++i) size[i] = node->size[i] / 2.0; #ifdef USE_THREADS // Prepare_Threads(SUB_DIVISIONS); // #pragma omp parallel for #endif //USE_THREADS for (unsigned a = 0; a < SUB_DIVISIONS; ++a) { if (node->children[a] == NULL) node->children[a] = Node_Create(node); //Create child node if necessary float x[DIMENSIONS]; // Determine the placement of each child node in space // Annoyingly long switch statement. switch (a) { case 0: x[0] = node->x[0]; x[1] = node->x[1]; x[2] = node->x[2]; break; case 1: x[0] = node->x[0] + size[0]; x[1] = node->x[1]; x[2] = node->x[2]; break; case 2: x[0] = node->x[0]; x[1] = node->x[1] + size[1]; x[2] = node->x[2]; break; case 3: x[0] = node->x[0]; x[1] = node->x[1]; x[2] = node->x[2] + size[2]; break; case 4: x[0] = node->x[0] + size[0]; x[1] = node->x[1] + size[1]; x[2] = node->x[2]; break; case 5: x[0] = node->x[0] + size[0]; x[1] = node->x[1]; x[2] = node->x[2] + size[2]; break; case 6: x[0] = node->x[0]; x[1] = node->x[1] + size[1]; x[2] = node->x[2] + size[2]; break; case 7: x[0] = node->x[0] + size[0]; x[1] = node->x[1] + size[1]; x[2] = node->x[2] + size[2]; break; } Node_SetSpace(node->children[a], x, size); // Setup the child in space unsigned sub = Node_Subdivide(node->children[a], s); // Recurse for the child count += sub; } return count; } /** * @function Node_AddBody * @purpose Terrify third year computer science students * *ahem* Add a Body* to the node's dynamic Body* array. Dynamically. Got that? * I love pointers! They are fun! * @param node - The Node; what did you think it was? * @param b - The Body to add * @returns - The index in the array that the Body* now occupies */ unsigned Node_AddBody(Node * node, Body * b) { unsigned i = 0; for (i = 0; i < node->allocated; ++i) // Search for an empty spot in the array { if (node->body[i] == NULL) // Got one! { node->body[i] = b; // Fill it! node->N += 1; return i; // return! } } // No empty spots found :( // The index got outside the array's allocated space, so allocate some more! node->allocated = 2 * (node->allocated + 1); // Doubling the space each time, like what C++ std::vector does if (node->body == NULL) node->body = (Body**)(calloc(node->allocated, sizeof(Body*))); // First Body* added, create new array else // Subsequent allocations use realloc { // realloc is expensive. That's why I double the space each time, instead of just adding a constant. // Of course, in the worst case, we are wasting half the memory after the last realloc // But performance wise it is better. node->body = (Body**)(realloc(node->body, sizeof(Body*) * node->allocated)); // Resize existing array memset(node->body+i, 0, sizeof(Body*) * (node->allocated - i)); // set all the new elements to NULL } //We now have a valid position in the array that is not filled node->body[i] = b; // So fill it node->N += 1; // don't forget to update this return i; //index in array of Body* } /** * @function Generate_Tree * @purpose Generate the tree from a System * @param s - The System * @param n - The root node * @returns The number of bodies in the node (should be the same as in the system) */ unsigned Generate_Tree(System * s, Node * n) { for (unsigned i = 0; i < DIMENSIONS; ++i) // set initial size of node { n->x[i] = -1.0; n->size[i] = 2.0; } Node_FitSystem(n, s); // size node so that it contains all bodies in the system Node_Subdivide(n, s); return n->N; } /** * @function Node_FitSystem * @purpose Resize a Node until all bodies in a System are within it * @param n - The Node * @param s - The System */ void Node_FitSystem(Node * n, System * s) { // I'm sure there is a faster way of doing this // But I like this algorithm. while (Node_FindBodies(n, s) < s->N) // Find bodies in the node... { // As long as the number found is less than the number in the system... for (unsigned i = 0; i < DIMENSIONS; ++i) { n->x[i] *= 2.0; n->size[i] *= 2.0; // Double the size of the Node! } } } /** * @function Update_Tree * @purpose Update the tree (it must already have been Generated with Generate_Tree) * @param s - The System * @param n - The root node of the tree * @returns The number of bodies in the tree */ unsigned Update_Tree(System * s, Node * n) { Node_FitSystem(n, s); Node_Subdivide(n, s); return n->N; } /** * @function Node_CalculateMass * @purpose Calculate the mass and centre of mass of a node * @param node - The node * @returns The mass of the node */ float Node_CalculateMass(Node * node) { node->mass = 0.0; if (node->N <= 0) // No bodies in the node return 0.0; if (node->N == 1) // Only one body in the node { Body * b = Node_Body(node, 0); node->mass = b->mass; for (unsigned i = 0; i < DIMENSIONS; ++i) node->com[i] = b->x[i]; //printf("Body %p has mass %f, center of mass at %f %f %f\n", (void*)(node), node->mass, node->com[0], node->com[1], node->com[2]); return node->mass; } // For more than 1 body in the node, calculate mass and com from child nodes for (unsigned i = 0; i < DIMENSIONS; ++i) // init com node->com[i] = 0.0; #ifdef USE_THREADS Prepare_Threads(SUB_DIVISIONS); #pragma omp parallel for #endif //USE_THREADS for (unsigned i = 0; i < SUB_DIVISIONS; ++i) { if (node->children[i] != NULL) { float m = Node_CalculateMass(node->children[i]); // Recurse for children node->mass += m; for (unsigned j = 0; j < DIMENSIONS; ++j) node->com[j] += m * node->children[i]->com[j]; } } for (unsigned i = 0; i < DIMENSIONS; ++i) node->com[i] = node->com[i] / node->mass; // weighted average for centre of masses //printf("Node %p has mass %f, center of mass at %f %f %f\n", (void*)(node), node->mass, node->com[0], node->com[1], node->com[2]); return node->mass; } /** * @function Node_Forces * @purpose Calculate the forces on a body due to a Node * @param a - The Body * @param n - The node * @param theta - The approximation factor; 0.0 for no approximation. * @returns The depth reached by the recursive algorithm */ unsigned Node_Forces(Body * a, Node * n, float theta) { if (n->N <= 0) return 0; if (n->N == 1) // external node with one body in it { Body * b = Node_Body(n, 0); if (b == a) //ignore self { return 0; } Body_Force(a, b); // calculate force return 1; } // internal node with more than one body in it... float con; float gd; float d = 0; float s = n->size[0] + n->size[1] + n->size[2] / 3.0; for (unsigned i = 0; i < DIMENSIONS; ++i) d += square(a->x[i] - n->com[i]); d = sqrt(d); if (s / d <= theta) // approximation is "valid" { // Calculate force based on centre of mass of the node con = G * a->mass * n->mass / square(d); gd = con / d; for (unsigned i = 0; i < DIMENSIONS; ++i) a->F[i] += gd * (n->com[i] - a->x[i]); return 1; } // approximation "invalid" because node is too close // Then recursively calculate forces due to the node's children unsigned count = 0; #ifdef USE_THREADS Prepare_Threads(SUB_DIVISIONS); #pragma omp parallel for #endif //USE_THREADS for (unsigned i = 0; i < SUB_DIVISIONS; ++i) { if (n->children[i] != NULL) { count += Node_Forces(a, n->children[i], theta); } } return count; } /** * @function Node_Create * @purpose Create a Node * @param parent - The parent of the Node (NULL if the node is the root of the tree) * @returns The new Node */ Node * Node_Create(Node * parent) { Node * result = (Node*)(calloc(1, sizeof(Node))); result->parent = parent; /* // In theory, using calloc instead of malloc should make everything zero // So I don't need to do it manually // Kept code here anyway. Because I don't trust gcc. result->N = 0; result->allocated = 0; result->body = 0; for (unsigned i = 0; i < SUB_DIVISIONS; ++i) result->children[i] = NULL; */ return result; } /** * @function Node_Body * @purpose Identify a Body within a Node by its position in the array of Body* * Because elements of the array may be NULL in general, n->body[index] may fail * Actually I think in my code it will work in all cases, because I never set elements of the array to NULL. * It's not worth the risk anyway, I might change my code. * * @param n - The Node * @param index - The position of the Body* in the array * @returns the Body* from the array */ Body * Node_Body(Node * n, unsigned index) { unsigned count = 0; Body * result = NULL; // Can't parallelise this, due to the break statement for (unsigned i = 0; i < n->allocated; ++i) // Go through the array... { if (n->body[i] != NULL) count += 1; // Count each non-empty entry if (count > index) { result = n->body[i]; // return element once counter expires break; } } return result; } /** * @function Draw_Node * @purpose Draw a node as a series of rectangular prisms * Draws all children of the node too. But only nodes with N == 1 will be shown. * @param n - The Node */ void Draw_Node(Node * n) { if (n->N == 1) { glBegin(GL_QUADS); glColor3f(0.0f, 1.0f, 0.0f); glVertex3f(n->x[0] + n->size[0], n->x[1] + n->size[1], n->x[2]); glVertex3f(n->x[0], n->x[1] + n->size[1], n->x[2]); glVertex3f(n->x[0], n->x[1] + n->size[1], n->x[2] + n->size[2]); glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]+n->size[2]); glVertex3f(n->x[0] + n->size[0], n->x[1], n->x[2] + n->size[2]); glVertex3f(n->x[0], n->x[1] , n->x[2] + n->size[2]); glVertex3f(n->x[0], n->x[1], n->x[2]); glVertex3f(n->x[0] + n->size[0], n->x[1], n->x[2]); glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]+n->size[2]); glVertex3f(n->x[0], n->x[1]+n->size[1], n->x[2]+n->size[2]); glVertex3f(n->x[0], n->x[1], n->x[2]+n->size[2]); glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]+n->size[2]); glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]); glVertex3f(n->x[0], n->x[1], n->x[2]); glVertex3f(n->x[0], n->x[1]+n->size[1], n->x[2]); glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]); glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]); glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2] + n->size[2]); glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2] + n->size[2]); glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]); glEnd(); } //printf("Node %p contains %u bodies in volume %f\n", (void*)n, n->N, n->size[0] * n->size[1] * n->size[2]); for (unsigned i = 0; i < SUB_DIVISIONS; ++i) { if (n->children[i] != NULL && n->children[i]->N > 0) Draw_Node(n->children[i]); } } /** * @function Node_ContainsBody * @purpose Indicate whether body is within a node * @param n - The Node * @param b - The Body * @returns true iff the body was in the node */ bool Node_ContainsBody(Node * n, Body * b) { if (b == NULL) return false; for (unsigned i = 0; i < DIMENSIONS; ++i) { float p = b->x[i] - n->x[i]; if (p < 0.00 || p > n->size[i]) return false; } return true; } #ifdef USE_THREADS /** * @function Prepare_Threads * @purpose Tell OMP to spawn up to the specified number of threads on the next parallel section * @param max - Spawn at most this many threads; in reality, less will be spawned */ void Prepare_Threads(unsigned max) { //return; if (options.nested_threads == 1) { omp_set_num_threads(1); return; } //printf("Thread %d in nest %d thinking of spawning more threads...\n", omp_get_thread_num(), omp_get_level()); //printf("There are %d nested threads, %d in current level\n", nested_used, omp_get_num_threads()); //printf("Got past check\n"); if (max > options.nested_threads) max = options.nested_threads; if (max <= 0) max = options.nested_threads; unsigned available = options.nested_threads - nested_used; if (available <= 0) available = 1; if (available < max) max = available; omp_set_num_threads(max); omp_set_lock(&nested_lock); nested_used += max-1; omp_unset_lock(&nested_lock); } #endif //USE_THREADS