Modules/IPStack - Fix page fault in TCP, quietened some things
[tpg/acess2.git] / KernelLand / Modules / IPStack / tcp.c
index 28dc2c5..3d4978b 100644 (file)
@@ -11,7 +11,6 @@
 #define USE_SELECT     1
 #define HEXDUMP_INCOMING       0
 #define HEXDUMP_OUTGOING       0
-#define        CACHE_FUTURE_PACKETS_IN_BYTES   1       // Use a ring buffer to cache out of order packets
 
 #define TCP_MIN_DYNPORT        0xC000
 #define TCP_MAX_HALFOPEN       1024    // Should be enough
@@ -22,6 +21,8 @@
 #define TCP_DACK_THRESHOLD     4096
 #define TCP_DACK_TIMEOUT       500
 
+#define TCP_DEBUG      0       // Set to non-0 to enable TCP packet logging
+
 // === PROTOTYPES ===
 void   TCP_Initialise(void);
 void   TCP_StartConnection(tTCPConnection *Conn);
@@ -30,20 +31,22 @@ void        TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
 void   TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Header, int Length);
 int    TCP_INT_AppendRecieved(tTCPConnection *Connection, const void *Data, size_t Length);
 void   TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection);
-void   TCP_INT_SendACK(tTCPConnection *Connection);
+void   TCP_int_SendDelayedACK(void *ConnPtr);
+void   TCP_INT_SendACK(tTCPConnection *Connection, const char *Reason);
 Uint16 TCP_GetUnusedPort();
  int   TCP_AllocatePort(Uint16 Port);
  int   TCP_DeallocatePort(Uint16 Port);
+tTCPConnection *TCP_int_CreateConnection(tInterface *Interface, enum eTCPConnectionState State);
 // --- Server
 tVFS_Node      *TCP_Server_Init(tInterface *Interface);
  int   TCP_Server_ReadDir(tVFS_Node *Node, int Pos, char Name[FILENAME_MAX]);
-tVFS_Node      *TCP_Server_FindDir(tVFS_Node *Node, const char *Name);
+tVFS_Node      *TCP_Server_FindDir(tVFS_Node *Node, const char *Name, Uint Flags);
  int   TCP_Server_IOCtl(tVFS_Node *Node, int ID, void *Data);
 void   TCP_Server_Close(tVFS_Node *Node);
 // --- Client
 tVFS_Node      *TCP_Client_Init(tInterface *Interface);
-size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer);
-size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer);
+size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer, Uint Flags);
+size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer, Uint Flags);
  int   TCP_Client_IOCtl(tVFS_Node *Node, int ID, void *Data);
 void   TCP_Client_Close(tVFS_Node *Node);
 // --- Helpers
@@ -164,6 +167,7 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
        tTCPListener    *srv;
        tTCPConnection  *conn;
 
+       #if TCP_DEBUG
        Log_Log("TCP", "TCP_GetPacket: <Local>:%i from [%s]:%i, Flags = %s%s%s%s%s%s%s%s",
                ntohs(hdr->DestPort),
                IPStack_PrintAddress(Interface->Type, Address),
@@ -177,6 +181,7 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
                (hdr->Flags & TCP_FLAG_SYN) ? "SYN " : "",
                (hdr->Flags & TCP_FLAG_FIN) ? "FIN " : ""
                );
+       #endif
 
        if( Length > (hdr->DataOffset >> 4)*4 )
        {
@@ -236,11 +241,9 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
                
                // TODO: Check for halfopen max
                
-               conn = calloc(1, sizeof(tTCPConnection));
-               conn->State = TCP_ST_SYN_RCVD;
+               conn = TCP_int_CreateConnection(Interface, TCP_ST_SYN_RCVD);
                conn->LocalPort = srv->Port;
                conn->RemotePort = ntohs(hdr->SourcePort);
-               conn->Interface = Interface;
                
                switch(Interface->Type)
                {
@@ -248,17 +251,10 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
                case 6: conn->RemoteIP.v6 = *(tIPv6*)Address;   break;
                }
                
-               conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
-               
                conn->NextSequenceRcv = ntohl( hdr->SequenceNumber ) + 1;
                conn->NextSequenceSend = rand();
                
-               // Create node
-               conn->Node.NumACLs = 1;
-               conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
-               conn->Node.ImplPtr = conn;
                conn->Node.ImplInt = srv->NextID ++;
-               conn->Node.Type = &gTCP_ClientNodeType; // TODO: Special type for the server end?
                
                // Hmm... Theoretically, this lock will never have to wait,
                // as the interface is locked to the watching thread, and this
@@ -336,7 +332,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
        if(Header->Flags & TCP_FLAG_SYN) {
                // TODO: What if the packet also has data?
                if( Connection->LastACKSequence != Connection->NextSequenceRcv )
-                       TCP_INT_SendACK(Connection);
+                       TCP_INT_SendACK(Connection, "SYN");
                Connection->NextSequenceRcv = ntohl(Header->SequenceNumber);
                Connection->LastACKSequence = Connection->NextSequenceRcv;
        }
@@ -350,7 +346,9 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
        // Get length of data
        dataLen = Length - (Header->DataOffset>>4)*4;
        LOG("dataLen = %i", dataLen);
+       #if TCP_DEBUG
        Log_Debug("TCP", "State %i, dataLen = %x", Connection->State, dataLen);
+       #endif
        
        // 
        // State Machine
@@ -426,7 +424,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        }
                        Connection->NextSequenceRcv ++; // TODO: Is this right? (empty packet counts as one byte)
                        Log_Log("TCP", "Empty Packet, inc and ACK the current sequence number");
-                       TCP_INT_SendACK(Connection);
+                       TCP_INT_SendACK(Connection, "Empty");
                        #if 0
                        Header->DestPort = Header->SourcePort;
                        Header->SourcePort = htons(Connection->LocalPort);
@@ -477,14 +475,14 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        // - Only send an ACK if we've had a burst
                        if( Connection->NextSequenceRcv > (Uint32)(TCP_DACK_THRESHOLD + Connection->LastACKSequence) )
                        {
-                               TCP_INT_SendACK(Connection);
+                               TCP_INT_SendACK(Connection, "DACK Burst");
                                // - Extend TCP deferred ACK timer
                                Time_RemoveTimer(Connection->DeferredACKTimer);
                        }
                        // - Schedule the deferred ACK timer (if already scheduled, this is a NOP)
                        Time_ScheduleTimer(Connection->DeferredACKTimer, TCP_DACK_TIMEOUT);
                        #else
-                       TCP_INT_SendACK(Connection);
+                       TCP_INT_SendACK(Connection, "RX");
                        #endif
                }
                // Check if the packet is in window
@@ -494,10 +492,9 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        Uint8   *dataptr = (Uint8*)Header + (Header->DataOffset>>4)*4;
                        #if CACHE_FUTURE_PACKETS_IN_BYTES
                        Uint32  index;
-                        int    i;
                        
                        index = sequence_num % TCP_WINDOW_SIZE;
-                       for( i = 0; i < dataLen; i ++ )
+                       for( int i = 0; i < dataLen; i ++ )
                        {
                                Connection->FuturePacketValidBytes[index/8] |= 1 << (index%8);
                                Connection->FuturePacketData[index] = dataptr[i];
@@ -557,7 +554,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        Log_Log("TCP", "Fully out of sequence packet (0x%08x not between 0x%08x and 0x%08x), dropped",
                                sequence_num, Connection->NextSequenceRcv, Connection->NextSequenceRcv+TCP_WINDOW_SIZE);
                        // Spec says we should send an empty ACK with the current state
-                       TCP_INT_SendACK(Connection);
+                       TCP_INT_SendACK(Connection, "Bad Seq");
                }
                break;
        
@@ -691,26 +688,29 @@ int TCP_INT_AppendRecieved(tTCPConnection *Connection, const void *Data, size_t
 void TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection)
 {
        #if CACHE_FUTURE_PACKETS_IN_BYTES
-        int    i, length = 0;
-       Uint32  index;
-       
        // Calculate length of contiguous bytes
-       length = Connection->HighestSequenceRcvd - Connection->NextSequenceRcv;
-       index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
-       for( i = 0; i < length; i ++ )
+        int    length = Connection->HighestSequenceRcvd - Connection->NextSequenceRcv;
+       Uint32  index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
+       for( int i = 0; i < length; i ++ )
        {
-               if( Connection->FuturePacketValidBytes[i / 8] == 0xFF ) {
-                       i += 7; index += 7;
-                       continue;
-               }
-               else if( !(Connection->FuturePacketValidBytes[i / 8] & (1 << (i%8))) )
+                int    bit = index % 8;
+               Uint8   bitfield_byte = Connection->FuturePacketValidBytes[index / 8];
+               if( (bitfield_byte & (1 << bit)) == 0 ) {
+                       length = i;
                        break;
-               
-               index ++;
+               }
+
+               if( bitfield_byte == 0xFF ) {
+                        int    inc = 8 - bit;
+                       i += inc - 1;
+                       index += inc;
+               }
+               else {
+                       index ++;
+               }
                if(index > TCP_WINDOW_SIZE)
                        index -= TCP_WINDOW_SIZE;
        }
-       length = i;
        
        index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
        
@@ -790,14 +790,19 @@ void TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection)
                SHORTREL( &Connection->lFuturePackets );
                
                // Looks like we found one
-               TCP_INT_AppendRecieved(Connection, pkt);
+               TCP_INT_AppendRecieved(Connection, pkt->Data, pkt->Length);
                Connection->NextSequenceRcv += pkt->Length;
                free(pkt);
        }
        #endif
 }
 
-void TCP_INT_SendACK(tTCPConnection *Connection)
+void TCP_int_SendDelayedACK(void *ConnPtr)
+{
+       TCP_INT_SendACK(ConnPtr, "DACK Timeout");
+}
+
+void TCP_INT_SendACK(tTCPConnection *Connection, const char *Reason)
 {
        tTCPHeader      hdr;
        // ACK Packet
@@ -810,7 +815,7 @@ void TCP_INT_SendACK(tTCPConnection *Connection)
        hdr.Flags = TCP_FLAG_ACK;       // TODO: Determine if SYN is wanted too
        hdr.Checksum = 0;       // TODO: Checksum
        hdr.UrgentPointer = 0;
-       Log_Debug("TCP", "Sending ACK for 0x%08x", Connection->NextSequenceRcv);
+       Log_Debug("TCP", "Sending ACK for 0x%08x (%s)", Connection->NextSequenceRcv, Reason);
        TCP_SendPacket( Connection, &hdr, 0, NULL );
        //Connection->NextSequenceSend ++;
        Connection->LastACKSequence = Connection->NextSequenceRcv;
@@ -873,6 +878,38 @@ int TCP_DeallocatePort(Uint16 Port)
        return 1;
 }
 
+tTCPConnection *TCP_int_CreateConnection(tInterface *Interface, enum eTCPConnectionState State)
+{
+       tTCPConnection  *conn = calloc( sizeof(tTCPConnection) + TCP_WINDOW_SIZE + TCP_WINDOW_SIZE/8, 1 );
+
+       conn->State = State;
+       conn->Interface = Interface;
+       conn->LocalPort = -1;
+       conn->RemotePort = -1;
+
+       conn->Node.ReferenceCount = 1;
+       conn->Node.ImplPtr = conn;
+       conn->Node.NumACLs = 1;
+       conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
+       conn->Node.Type = &gTCP_ClientNodeType;
+       conn->Node.BufferFull = 1;      // Cleared when connection opens
+
+       conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
+       #if 0
+       conn->SentBuffer = RingBuffer_Create( TCP_SEND_BUFFER_SIZE );
+       Semaphore_Init(conn->SentBufferSpace, 0, TCP_SEND_BUFFER_SIZE, "TCP SentBuffer", conn->Name);
+       #endif
+       
+       #if CACHE_FUTURE_PACKETS_IN_BYTES
+       // Future recieved data (ahead of the expected sequence number)
+       conn->FuturePacketData = (Uint8*)conn + sizeof(tTCPConnection);
+       conn->FuturePacketValidBytes = conn->FuturePacketData + TCP_WINDOW_SIZE;
+       #endif
+
+       conn->DeferredACKTimer = Time_AllocateTimer( TCP_int_SendDelayedACK, conn);
+       return conn;
+}
+
 // --- Server
 tVFS_Node *TCP_Server_Init(tInterface *Interface)
 {
@@ -950,7 +987,7 @@ int TCP_Server_ReadDir(tVFS_Node *Node, int Pos, char Dest[FILENAME_MAX])
  * \param Node Server node
  * \param Name Hexadecimal ID of the node
  */
-tVFS_Node *TCP_Server_FindDir(tVFS_Node *Node, const char *Name)
+tVFS_Node *TCP_Server_FindDir(tVFS_Node *Node, const char *Name, Uint Flags)
 {
        tTCPConnection  *conn;
        tTCPListener    *srv = Node->ImplPtr;
@@ -1065,32 +1102,7 @@ void TCP_Server_Close(tVFS_Node *Node)
  */
 tVFS_Node *TCP_Client_Init(tInterface *Interface)
 {
-       tTCPConnection  *conn = calloc( sizeof(tTCPConnection) + TCP_WINDOW_SIZE + TCP_WINDOW_SIZE/8, 1 );
-
-       conn->State = TCP_ST_CLOSED;
-       conn->Interface = Interface;
-       conn->LocalPort = -1;
-       conn->RemotePort = -1;
-
-       conn->Node.ImplPtr = conn;
-       conn->Node.NumACLs = 1;
-       conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
-       conn->Node.Type = &gTCP_ClientNodeType;
-       conn->Node.BufferFull = 1;      // Cleared when connection opens
-
-       conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
-       #if 0
-       conn->SentBuffer = RingBuffer_Create( TCP_SEND_BUFFER_SIZE );
-       Semaphore_Init(conn->SentBufferSpace, 0, TCP_SEND_BUFFER_SIZE, "TCP SentBuffer", conn->Name);
-       #endif
-       
-       #if CACHE_FUTURE_PACKETS_IN_BYTES
-       // Future recieved data (ahead of the expected sequence number)
-       conn->FuturePacketData = (Uint8*)conn + sizeof(tTCPConnection);
-       conn->FuturePacketValidBytes = conn->FuturePacketData + TCP_WINDOW_SIZE;
-       #endif
-
-       conn->DeferredACKTimer = Time_AllocateTimer( (void(*)(void*)) TCP_INT_SendACK, conn);
+       tTCPConnection  *conn = TCP_int_CreateConnection(Interface, TCP_ST_CLOSED);
 
        SHORTLOCK(&glTCP_OutbountCons);
        conn->Next = gTCP_OutbountCons;
@@ -1105,7 +1117,7 @@ tVFS_Node *TCP_Client_Init(tInterface *Interface)
  * \note If \a Length is smaller than the size of the packet, the rest
  *       of the packet's data will be discarded.
  */
-size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer)
+size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer, Uint Flags)
 {
        tTCPConnection  *conn = Node->ImplPtr;
        size_t  len;
@@ -1123,6 +1135,7 @@ size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffe
                
                if( len == 0 ) {
                        VFS_MarkAvaliable(Node, 0);
+                       errno = 0;
                        LEAVE('i', -1);
                        return -1;
                }
@@ -1132,7 +1145,17 @@ size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffe
        }
        
        // Wait
-       VFS_SelectNode(Node, VFS_SELECT_READ|VFS_SELECT_ERROR, NULL, "TCP_Client_Read");
+       {
+               tTime   *timeout = NULL;
+               tTime   timeout_zero = 0;
+               if( Flags & VFS_IOFLAG_NOBLOCK )
+                       timeout = &timeout_zero;
+               if( !VFS_SelectNode(Node, VFS_SELECT_READ|VFS_SELECT_ERROR, timeout, "TCP_Client_Read") ) {
+                       errno = EWOULDBLOCK;
+                       LEAVE('i', -1);
+                       return -1;
+               }
+       }
        
        // Lock list and read as much as possible (up to `Length`)
        Mutex_Acquire( &conn->lRecievedPackets );
@@ -1157,6 +1180,9 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
 {
        char    buf[sizeof(tTCPHeader)+Length];
        tTCPHeader      *packet = (void*)buf;
+
+       // - Stop Delayed ACK timer (as this data packet ACKs)
+       Time_RemoveTimer(Connection->DeferredACKTimer);
        
        packet->SourcePort = htons(Connection->LocalPort);
        packet->DestPort = htons(Connection->RemotePort);
@@ -1166,6 +1192,7 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
        packet->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
        packet->SequenceNumber = htonl(Connection->NextSequenceSend);
        packet->Flags = TCP_FLAG_PSH|TCP_FLAG_ACK;      // Hey, ACK if you can!
+       packet->UrgentPointer = 0;
        
        memcpy(packet->Options, Data, Length);
        
@@ -1182,7 +1209,7 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
 /**
  * \brief Send some bytes on a connection
  */
-size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer)
+size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer, Uint Flags)
 {
        tTCPConnection  *conn = Node->ImplPtr;
        size_t  rem = Length;
@@ -1197,12 +1224,23 @@ size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void
        // Don't allow a write to a closed connection
        if( conn->State > TCP_ST_OPEN ) {
                VFS_MarkError(Node, 1);
+               errno = 0;
                LEAVE('i', -1);
                return -1;
        }
        
        // Wait
-       VFS_SelectNode(Node, VFS_SELECT_WRITE|VFS_SELECT_ERROR, NULL, "TCP_Client_Write");
+       {
+               tTime   *timeout = NULL;
+               tTime   timeout_zero = 0;
+               if( Flags & VFS_IOFLAG_NOBLOCK )
+                       timeout = &timeout_zero;
+               if( !VFS_SelectNode(Node, VFS_SELECT_WRITE|VFS_SELECT_ERROR, timeout, "TCP_Client_Write") ) {
+                       errno = EWOULDBLOCK;
+                       LEAVE('i', -1);
+                       return -1;
+               }
+       }
        
        do
        {
@@ -1333,6 +1371,15 @@ void TCP_Client_Close(tVFS_Node *Node)
        
        ENTER("pNode", Node);
        
+       ASSERT(Node->ReferenceCount != 0);
+
+       if( Node->ReferenceCount > 1 ) {
+               Node->ReferenceCount --;
+               LOG("Dereference only");
+               LEAVE('-');
+               return ;
+       }
+       
        if( conn->State == TCP_ST_CLOSE_WAIT || conn->State == TCP_ST_OPEN )
        {
                packet.SourcePort = htons(conn->LocalPort);
@@ -1349,6 +1396,9 @@ void TCP_Client_Close(tVFS_Node *Node)
        
        switch( conn->State )
        {
+       case TCP_ST_CLOSED:
+               Log_Warning("TCP", "Closing connection that was never opened");
+               break;
        case TCP_ST_CLOSE_WAIT:
                conn->State = TCP_ST_LAST_ACK;
                break;

UCC git Repository :: git.ucc.asn.au