Parallel Programming - Final version
[matches/honours.git] / course / semester2 / pprog / assignment1 / nbody-bh / tree.c
diff --git a/course/semester2/pprog/assignment1/nbody-bh/tree.c b/course/semester2/pprog/assignment1/nbody-bh/tree.c
new file mode 100644 (file)
index 0000000..f7c9b22
--- /dev/null
@@ -0,0 +1,590 @@
+/**
+ * @file "tree.c"
+ * @purpose Implementation of Barnes Hut algorithms
+ * @author Sam Moore (20503628) 2012
+ */
+
+#include "tree.h"
+#include "math.h"
+#include <string.h>
+#include "graphics.h"
+
+#ifdef USE_THREADS
+unsigned nested_used = 0;
+#endif //USE_THREADS
+
+
+/**
+ * @function Node_SetSpace
+ * @purpose Set the position and space occupied by a node
+ * @param node - The Node
+ * @param x - The vertex with the smallest coordinate values
+ * @param size - The dimensions of the cube that the Node represents
+ */
+void Node_SetSpace(Node * node, float x[DIMENSIONS], float size[DIMENSIONS])
+{
+       for (unsigned i = 0; i < DIMENSIONS; ++i)
+       {
+               node->x[i] = x[i];
+               node->size[i] = size[i];
+       }
+}
+
+/**
+ * @function Node_Destroy
+ * @purpose Confuse Java programmers
+ *     *ahem* Cleanup the memory used by the Node and its children
+ * @param node - The Node
+ */
+void Node_Destroy(Node * node)
+{
+       
+       if (node == NULL)
+               return;
+
+       //printf("Destroy node %p\n", (void*)node);
+
+       if (node->body != NULL)
+       {
+               free(node->body);
+               node->body = NULL;
+       }
+       node->allocated = 0;
+       node->N = 0;
+
+       #ifdef USE_THREADS
+               Prepare_Threads(SUB_DIVISIONS);
+       #pragma omp parallel for
+       #endif //USE_THREADS
+       for (unsigned i = 0; i < SUB_DIVISIONS; ++i)
+       {
+               Node_Destroy(node->children[i]);
+               free(node->children[i]);
+               node->children[i] = NULL;
+       }
+       
+}
+
+/**
+ * @function Node_FindBodies
+ * @purpose Search for bodies in a system within a Node, and add them to the Node's array of bodies.
+ * @param n The Node
+ * @param s The System
+ * @returns The total number of bodies found from s within n
+ */
+unsigned Node_FindBodies(Node * n, System * s)
+{
+
+       if (n->body != NULL)
+       {
+
+               // reset the body array first
+               #ifdef USE_THREADS
+               //Prepare_Threads(options.nested_threads);
+               //#pragma omp parallel for
+               #endif //USE_THREADS
+               for (unsigned i = 0; i < n->allocated; ++i)
+                       n->body[i] = NULL;
+       }
+       n->N = 0; // Don't forget this!
+
+       if (n->parent == NULL)
+       {
+               #ifdef USE_THREADS
+               //Prepare_Threads(options.nested_threads);
+               //#pragma omp parallel for
+               #endif //USE_THREADS
+               for (unsigned a = 0; a < s->N; ++a)
+               {
+                       if (Node_ContainsBody(n, s->body+a))
+                               Node_AddBody(n, s->body+a);
+               }
+       }
+       else
+       {
+               #ifdef USE_THREADS
+               //Prepare_Threads(options.nested_threads);
+               //#pragma omp parallel for
+               #endif //USE_THREADS
+               for (unsigned a = 0; a < n->parent->allocated; ++a)
+               {
+                       if (Node_ContainsBody(n, n->parent->body[a]))
+                               Node_AddBody(n, n->parent->body[a]);
+               }
+       }
+       //printf("Node %p contains %d bodies\n", (void*)(n), n->N);
+       return n->N;
+}
+
+/** 
+ * @function Node_Subdivide
+ * @purpose Subdivide a Node into more nodes and fill them with bodies
+ * @param node - The Node
+ * @param s - The system which the bodies are from 
+ * @returns The number of bodies contained in all children of node after this is finished
+ */
+unsigned Node_Subdivide(Node * node, System * s)
+{
+       if (Node_FindBodies(node, s) <= 1) // First find all bodies in the node
+               return node->N; // If there was one or less, no need to subdivide
+
+       unsigned count = 0; // stores the return value of the function
+       float size[DIMENSIONS]; // size of the children of this node
+       for (unsigned i = 0; i < DIMENSIONS; ++i)
+               size[i] = node->size[i] / 2.0;
+
+       #ifdef USE_THREADS
+       //      Prepare_Threads(SUB_DIVISIONS);
+       // #pragma omp parallel for
+       #endif //USE_THREADS
+       for (unsigned a = 0; a < SUB_DIVISIONS; ++a)
+       {
+               if (node->children[a] == NULL)
+                       node->children[a] = Node_Create(node);  //Create child node if necessary
+
+               float x[DIMENSIONS];
+
+               // Determine the placement of each child node in space
+               // Annoyingly long switch statement.
+               switch (a)
+               {
+                       case 0:
+                               x[0] = node->x[0];
+                               x[1] = node->x[1];
+                               x[2] = node->x[2];
+                               break;
+                       case 1:
+                               x[0] = node->x[0] + size[0];
+                               x[1] = node->x[1];
+                               x[2] = node->x[2];
+                               break;
+                       case 2:
+                               x[0] = node->x[0];
+                               x[1] = node->x[1] + size[1];
+                               x[2] = node->x[2];
+                               break;
+                       case 3:
+                               x[0] = node->x[0];
+                               x[1] = node->x[1];
+                               x[2] = node->x[2] + size[2];
+                               break;
+                       case 4:
+                               x[0] = node->x[0] + size[0];
+                               x[1] = node->x[1] + size[1];
+                               x[2] = node->x[2];
+                               break;
+                       case 5:
+                               x[0] = node->x[0] + size[0];
+                               x[1] = node->x[1];
+                               x[2] = node->x[2] + size[2];
+
+                               break;
+                       case 6:
+                               x[0] = node->x[0];
+                               x[1] = node->x[1] + size[1];
+                               x[2] = node->x[2] + size[2];
+                               break;
+                       case 7:
+                               x[0] = node->x[0] + size[0];
+                               x[1] = node->x[1] + size[1];
+                               x[2] = node->x[2] + size[2];
+                               break;
+               }
+
+               Node_SetSpace(node->children[a], x, size); // Setup the child in space
+               unsigned sub = Node_Subdivide(node->children[a], s); // Recurse for the child
+               count += sub;
+       }
+       return count;
+}
+
+
+/**
+ * @function Node_AddBody
+ * @purpose Terrify third year computer science students
+ *     *ahem* Add a Body* to the node's dynamic Body* array. Dynamically. Got that?
+ *     I love pointers! They are fun!
+ * @param node - The Node; what did you think it was?
+ * @param b - The Body to add
+ * @returns - The index in the array that the Body* now occupies
+ */
+unsigned Node_AddBody(Node * node, Body * b)
+{
+       unsigned i = 0;
+       for (i = 0; i < node->allocated; ++i) // Search for an empty spot in the array
+       {
+               if (node->body[i] == NULL) // Got one!
+               {
+                       node->body[i] = b; // Fill it!
+                       node->N += 1;
+                       return i; // return!
+               }
+       }
+
+       // No empty spots found :(
+       // The index got outside the array's allocated space, so allocate some more!
+       node->allocated = 2 * (node->allocated + 1); // Doubling the space each time, like what C++ std::vector does
+
+       if (node->body == NULL) 
+               node->body = (Body**)(calloc(node->allocated, sizeof(Body*))); // First Body* added, create new array
+       else // Subsequent allocations use realloc
+       {
+               // realloc is expensive. That's why I double the space each time, instead of just adding a constant.
+               // Of course, in the worst case, we are wasting half the memory after the last realloc
+               // But performance wise it is better.
+               node->body = (Body**)(realloc(node->body, sizeof(Body*) * node->allocated)); // Resize existing array
+               memset(node->body+i, 0, sizeof(Body*) * (node->allocated - i)); // set all the new elements to NULL
+       }
+
+       //We now have a valid position in the array that is not filled
+       node->body[i] = b; // So fill it
+       node->N += 1; // don't forget to update this
+       return i; //index in array of Body*
+       
+}
+
+/**
+ * @function Generate_Tree
+ * @purpose Generate the tree from a System
+ * @param s - The System
+ * @param n - The root node
+ * @returns The number of bodies in the node (should be the same as in the system)
+ */
+unsigned Generate_Tree(System * s, Node * n)
+{
+       for (unsigned i = 0; i < DIMENSIONS; ++i) // set initial size of node
+       {
+               n->x[i] = -1.0;
+               n->size[i] = 2.0;
+       }
+
+       Node_FitSystem(n, s); // size node so that it contains all bodies in the system
+
+       
+       Node_Subdivide(n, s);
+
+       return n->N;
+}
+
+/**
+ * @function Node_FitSystem
+ * @purpose Resize a Node until all bodies in a System are within it
+ * @param n - The Node
+ * @param s - The System
+ */
+void Node_FitSystem(Node * n, System * s)
+{
+       // I'm sure there is a faster way of doing this
+       // But I like this algorithm. 
+
+
+       while (Node_FindBodies(n, s) < s->N) // Find bodies in the node...
+       {
+               // As long as the number found is less than the number in the system...
+               
+               for (unsigned i = 0; i < DIMENSIONS; ++i)
+               {
+                       n->x[i] *= 2.0;
+                       n->size[i] *= 2.0; // Double the size of the Node!
+               }
+       }
+}
+
+/**
+ * @function Update_Tree
+ * @purpose Update the tree (it must already have been Generated with Generate_Tree)
+ * @param s - The System
+ * @param n - The root node of the tree
+ * @returns The number of bodies in the tree
+ */
+unsigned Update_Tree(System * s, Node * n)
+{
+       Node_FitSystem(n, s);
+
+       Node_Subdivide(n, s);
+       return n->N;
+}
+
+/**
+ * @function Node_CalculateMass
+ * @purpose Calculate the mass and centre of mass of a node
+ * @param node - The node
+ * @returns The mass of the node
+ */
+float Node_CalculateMass(Node * node)
+{
+       node->mass = 0.0;
+       if (node->N <= 0) // No bodies in the node
+               return 0.0;
+
+       if (node->N == 1) // Only one body in the node
+       {
+               Body * b = Node_Body(node, 0);
+               node->mass = b->mass;
+               for (unsigned i = 0; i < DIMENSIONS; ++i)
+                       node->com[i] = b->x[i];
+       //printf("Body %p has mass %f, center of mass at %f %f %f\n", (void*)(node), node->mass, node->com[0], node->com[1], node->com[2]);
+               return node->mass;
+       }
+
+
+       // For more than 1 body in the node, calculate mass and com from child nodes
+
+       for (unsigned i = 0; i < DIMENSIONS; ++i) // init com
+               node->com[i] = 0.0;
+
+
+       #ifdef USE_THREADS
+               Prepare_Threads(SUB_DIVISIONS);
+       #pragma omp parallel for
+       #endif //USE_THREADS
+       for (unsigned i = 0; i < SUB_DIVISIONS; ++i)
+       {
+               if (node->children[i] != NULL)
+               {
+                       float m = Node_CalculateMass(node->children[i]); // Recurse for children
+                       node->mass += m;
+                       for (unsigned j = 0; j < DIMENSIONS; ++j)
+                               node->com[j] += m * node->children[i]->com[j];                  
+               }
+       }
+       for (unsigned i = 0; i < DIMENSIONS; ++i)
+               node->com[i] = node->com[i] / node->mass; // weighted average for centre of masses
+
+       //printf("Node %p has mass %f, center of mass at %f %f %f\n", (void*)(node), node->mass, node->com[0], node->com[1], node->com[2]);
+       return node->mass;
+}
+
+/**
+ * @function Node_Forces
+ * @purpose Calculate the forces on a body due to a Node
+ * @param a - The Body
+ * @param n - The node
+ * @param theta - The approximation factor; 0.0 for no approximation.
+ * @returns The depth reached by the recursive algorithm 
+ */
+unsigned Node_Forces(Body * a, Node * n, float theta)
+{
+       if (n->N <= 0)
+               return 0;
+
+
+       if (n->N == 1) // external node with one body in it
+       {
+               Body * b = Node_Body(n, 0);
+               if (b == a) //ignore self
+               {
+                       return 0;
+               }
+               Body_Force(a, b); // calculate force
+               return 1;
+       }
+
+       // internal node with more than one body in it...
+       
+
+       float con;
+       float gd;
+       float d = 0;
+       float s = n->size[0] + n->size[1] + n->size[2] / 3.0;
+
+       for (unsigned i = 0; i < DIMENSIONS; ++i)
+               d += square(a->x[i] - n->com[i]);       
+       d = sqrt(d);
+
+       if (s / d <= theta) // approximation is "valid"
+       {
+               // Calculate force based on centre of mass of the node
+               con = G * a->mass * n->mass / square(d);
+               gd = con / d;
+               for (unsigned i = 0; i < DIMENSIONS; ++i)
+                       a->F[i] += gd * (n->com[i] - a->x[i]);
+               return 1;
+       }
+       
+       // approximation "invalid" because node is too close
+       // Then recursively calculate forces due to the node's children
+       
+       unsigned count = 0;
+       #ifdef USE_THREADS
+               Prepare_Threads(SUB_DIVISIONS);
+       #pragma omp parallel for
+       #endif //USE_THREADS
+       for (unsigned i = 0; i < SUB_DIVISIONS; ++i)
+       {
+               if (n->children[i] != NULL)
+               {
+                       count += Node_Forces(a, n->children[i], theta);
+               }
+       }
+       return count;
+}
+
+/**
+ * @function Node_Create
+ * @purpose Create a Node
+ * @param parent - The parent of the Node (NULL if the node is the root of the tree)
+ * @returns The new Node
+ */
+Node * Node_Create(Node * parent)
+{
+       Node * result = (Node*)(calloc(1, sizeof(Node)));
+
+       result->parent = parent;
+
+       /* // In theory, using calloc instead of malloc should make everything zero
+               // So I don't need to do it manually
+               // Kept code here anyway. Because I don't trust gcc.
+       
+       result->N = 0;
+       result->allocated = 0;
+       result->body = 0;
+       for (unsigned i = 0; i < SUB_DIVISIONS; ++i)
+               result->children[i] = NULL;
+       */
+
+       return result;
+       
+}
+
+/**
+ * @function Node_Body
+ * @purpose Identify a Body within a Node by its position in the array of Body*
+ *     Because elements of the array may be NULL in general, n->body[index] may fail
+ *     Actually I think in my code it will work in all cases, because I never set elements of the array to NULL.
+ *     It's not worth the risk anyway, I might change my code.
+ *
+ * @param n - The Node
+ * @param index - The position of the Body* in the array
+ * @returns the Body* from the array
+ */
+Body * Node_Body(Node * n, unsigned index)
+{
+       unsigned count = 0;
+       Body * result = NULL;
+
+       // Can't parallelise this, due to the break statement
+       for (unsigned i = 0; i < n->allocated; ++i) // Go through the array...
+       {
+               if (n->body[i] != NULL)
+                       count += 1; // Count each non-empty entry
+               if (count > index)
+               {
+                       result = n->body[i]; // return element once counter expires
+                       break;
+               }
+       }
+
+       return result;
+}
+
+/**
+ * @function Draw_Node
+ * @purpose Draw a node as a series of rectangular prisms
+ *     Draws all children of the node too. But only nodes with N == 1 will be shown.
+ * @param n - The Node
+ */
+void Draw_Node(Node * n)
+{
+       
+       if (n->N == 1)
+       {
+       glBegin(GL_QUADS);
+       glColor3f(0.0f, 1.0f, 0.0f);
+       glVertex3f(n->x[0] + n->size[0], n->x[1] + n->size[1], n->x[2]);
+       glVertex3f(n->x[0], n->x[1] + n->size[1], n->x[2]);
+       glVertex3f(n->x[0], n->x[1] + n->size[1], n->x[2] + n->size[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]+n->size[2]);
+
+       glVertex3f(n->x[0] + n->size[0], n->x[1], n->x[2] + n->size[2]);
+       glVertex3f(n->x[0], n->x[1] , n->x[2] + n->size[2]);
+       glVertex3f(n->x[0], n->x[1], n->x[2]);
+       glVertex3f(n->x[0] + n->size[0], n->x[1], n->x[2]);
+
+       glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]+n->size[2]);
+       glVertex3f(n->x[0], n->x[1]+n->size[1], n->x[2]+n->size[2]);
+       glVertex3f(n->x[0], n->x[1], n->x[2]+n->size[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]+n->size[2]);
+
+       glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]);
+       glVertex3f(n->x[0], n->x[1], n->x[2]);
+       glVertex3f(n->x[0], n->x[1]+n->size[1], n->x[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]);
+
+       glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1]+n->size[1], n->x[2] + n->size[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2] + n->size[2]);
+       glVertex3f(n->x[0]+n->size[0], n->x[1], n->x[2]);
+       glEnd();
+       }
+       //printf("Node %p contains %u bodies in volume %f\n", (void*)n, n->N, n->size[0] * n->size[1] * n->size[2]);
+       for (unsigned i = 0; i < SUB_DIVISIONS; ++i)
+       {
+               if (n->children[i] != NULL && n->children[i]->N > 0)
+                       Draw_Node(n->children[i]);
+       }
+
+}
+
+/**
+ * @function Node_ContainsBody
+ * @purpose Indicate whether body is within a node
+ * @param n - The Node
+ * @param b - The Body
+ * @returns true iff the body was in the node
+ */
+bool Node_ContainsBody(Node * n, Body * b)
+{
+       if (b == NULL)
+               return false;
+       for (unsigned i = 0; i < DIMENSIONS; ++i)
+       {
+               float p = b->x[i] - n->x[i];
+               if (p < 0.00 || p > n->size[i])
+                       return false;
+       }
+       return true;
+}
+
+// Doesn't work
+#ifdef USE_THREADS
+/**
+ * @function Prepare_Threads
+ * @purpose Tell OMP to spawn up to the specified number of threads on the next parallel section
+ * @param max - Spawn at most this many threads; in reality, less will be spawned
+ */
+void Prepare_Threads(unsigned max)
+{
+       //return;
+       
+       if (options.nested_threads == 1)
+       {
+               omp_set_num_threads(1);
+               return;
+       }
+       //printf("Thread %d in nest %d thinking of spawning more threads...\n", omp_get_thread_num(), omp_get_level());
+       //printf("There are %d nested threads, %d in current level\n", nested_used, omp_get_num_threads());
+       //printf("Got past check\n");
+       
+       if (max > options.nested_threads)
+               max = options.nested_threads;
+       if (max <= 0)
+               max = options.nested_threads;
+
+       
+       
+
+       unsigned available = options.nested_threads - nested_used;
+       if (available <= 0)
+               available = 1;  
+       if (available < max)
+               max = available;
+               
+       omp_set_num_threads(max);
+
+       omp_set_lock(&nested_lock);
+       nested_used += max-1;
+       omp_unset_lock(&nested_lock);
+       
+}
+#endif //USE_THREADS

UCC git Repository :: git.ucc.asn.au