Modules/IPStack - Abstract HW addr cache, IPv6 TX (no ND yet)
[tpg/acess2.git] / KernelLand / Modules / IPStack / tcp.c
index e9c3de9..fcff014 100644 (file)
@@ -2,45 +2,50 @@
  * Acess2 IP Stack
  * - TCP Handling
  */
-#define DEBUG  1
+#define DEBUG  0
 #include "ipstack.h"
 #include "ipv4.h"
 #include "ipv6.h"
 #include "tcp.h"
 
-#define USE_SELECT     1
 #define HEXDUMP_INCOMING       0
 #define HEXDUMP_OUTGOING       0
-#define        CACHE_FUTURE_PACKETS_IN_BYTES   1       // Use a ring buffer to cache out of order packets
 
 #define TCP_MIN_DYNPORT        0xC000
 #define TCP_MAX_HALFOPEN       1024    // Should be enough
 
 #define TCP_MAX_PACKET_SIZE    1024
 #define TCP_WINDOW_SIZE        0x2000
-#define TCP_RECIEVE_BUFFER_SIZE        0x4000
+#define TCP_RECIEVE_BUFFER_SIZE        0x8000
+#define TCP_DACK_THRESHOLD     4096
+#define TCP_DACK_TIMEOUT       500
+
+#define TCP_DEBUG      0       // Set to non-0 to enable TCP packet logging
 
 // === PROTOTYPES ===
 void   TCP_Initialise(void);
 void   TCP_StartConnection(tTCPConnection *Conn);
-void   TCP_SendPacket(tTCPConnection *Conn, size_t Length, tTCPHeader *Data);
+void   TCP_SendPacket(tTCPConnection *Conn, tTCPHeader *Header, size_t DataLen, const void *Data);
 void   TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffer);
 void   TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Header, int Length);
 int    TCP_INT_AppendRecieved(tTCPConnection *Connection, const void *Data, size_t Length);
 void   TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection);
+void   TCP_int_SendDelayedACK(void *ConnPtr);
+void   TCP_INT_SendACK(tTCPConnection *Connection, const char *Reason);
 Uint16 TCP_GetUnusedPort();
  int   TCP_AllocatePort(Uint16 Port);
  int   TCP_DeallocatePort(Uint16 Port);
+tTCPConnection *TCP_int_CreateConnection(tInterface *Interface, enum eTCPConnectionState State);
 // --- Server
 tVFS_Node      *TCP_Server_Init(tInterface *Interface);
-char   *TCP_Server_ReadDir(tVFS_Node *Node, int Pos);
-tVFS_Node      *TCP_Server_FindDir(tVFS_Node *Node, const char *Name);
+ int   TCP_Server_ReadDir(tVFS_Node *Node, int Pos, char Name[FILENAME_MAX]);
+tVFS_Node      *TCP_Server_FindDir(tVFS_Node *Node, const char *Name, Uint Flags);
  int   TCP_Server_IOCtl(tVFS_Node *Node, int ID, void *Data);
 void   TCP_Server_Close(tVFS_Node *Node);
 // --- Client
 tVFS_Node      *TCP_Client_Init(tInterface *Interface);
-Uint64 TCP_Client_Read(tVFS_Node *Node, Uint64 Offset, Uint64 Length, void *Buffer);
-Uint64 TCP_Client_Write(tVFS_Node *Node, Uint64 Offset, Uint64 Length, const void *Buffer);
+size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer, Uint Flags);
+size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer, Uint Flags);
  int   TCP_Client_IOCtl(tVFS_Node *Node, int ID, void *Data);
 void   TCP_Client_Close(tVFS_Node *Node);
 // --- Helpers
@@ -81,7 +86,7 @@ Uint32        gaTCP_PortBitmap[0x800];
  */
 void TCP_Initialise(void)
 {
-       giTCP_NextOutPort += rand()%32;
+       giTCP_NextOutPort += rand()%128;
        IPStack_AddFile(&gTCP_ServerFile);
        IPStack_AddFile(&gTCP_ClientFile);
        IPv4_RegisterCallback(IP4PROT_TCP, TCP_GetPacket);
@@ -94,30 +99,42 @@ void TCP_Initialise(void)
  * \param Length       Length of data
  * \param Data Packet data (cast as a TCP Header)
  */
-void TCP_SendPacket( tTCPConnection *Conn, size_t Length, tTCPHeader *Data )
+void TCP_SendPacket( tTCPConnection *Conn, tTCPHeader *Header, size_t Length, const void *Data )
 {
-       Uint16  checksum[2];
-       
-       Data->Checksum = 0;
-       checksum[1] = htons( ~IPv4_Checksum( (void*)Data, Length ) );   // Partial checksum
-       if(Length & 1)
-               ((Uint8*)Data)[Length] = 0;
+       tIPStackBuffer  *buffer;
+       Uint16  checksum[3];
+        int    packlen = sizeof(*Header) + Length;
+       
+       buffer = IPStack_Buffer_CreateBuffer(2 + IPV4_BUFFERS);
+       if( Data && Length )
+               IPStack_Buffer_AppendSubBuffer(buffer, Length, 0, Data, NULL, NULL);
+       IPStack_Buffer_AppendSubBuffer(buffer, sizeof(*Header), 0, Header, NULL, NULL);
+
+       LOG("Sending %i+%i to %s:%i", sizeof(*Header), Length,
+               IPStack_PrintAddress(Conn->Interface->Type, &Conn->RemoteIP),
+               Conn->RemotePort
+               );
+
+       Header->Checksum = 0;
+       checksum[1] = htons( ~IPv4_Checksum(Header, sizeof(tTCPHeader)) );
+       checksum[2] = htons( ~IPv4_Checksum(Data, Length) );
        
        // TODO: Fragment packet
        
        switch( Conn->Interface->Type )
        {
        case 4:
-               // Append IPv4 Pseudo Header
+               // Get IPv4 pseudo-header checksum
                {
                        Uint32  buf[3];
                        buf[0] = ((tIPv4*)Conn->Interface->Address)->L;
                        buf[1] = Conn->RemoteIP.v4.L;
-                       buf[2] = (htons(Length)<<16) | (6<<8) | 0;
+                       buf[2] = (htons(packlen)<<16) | (6<<8) | 0;
                        checksum[0] = htons( ~IPv4_Checksum(buf, sizeof(buf)) );        // Partial checksum
                }
-               Data->Checksum = htons( IPv4_Checksum(checksum, 2*2) ); // Combine the two
-               IPv4_SendPacket(Conn->Interface, Conn->RemoteIP.v4, IP4PROT_TCP, 0, Length, Data);
+               // - Combine checksums
+               Header->Checksum = htons( IPv4_Checksum(checksum, sizeof(checksum)) );
+               IPv4_SendPacket(Conn->Interface, Conn->RemoteIP.v4, IP4PROT_TCP, 0, buffer);
                break;
                
        case 6:
@@ -126,12 +143,12 @@ void TCP_SendPacket( tTCPConnection *Conn, size_t Length, tTCPHeader *Data )
                        Uint32  buf[4+4+1+1];
                        memcpy(buf, Conn->Interface->Address, 16);
                        memcpy(&buf[4], &Conn->RemoteIP, 16);
-                       buf[8] = htonl(Length);
+                       buf[8] = htonl(packlen);
                        buf[9] = htonl(6);
                        checksum[0] = htons( ~IPv4_Checksum(buf, sizeof(buf)) );        // Partial checksum
                }
-               Data->Checksum = htons( IPv4_Checksum(checksum, 2*2) ); // Combine the two
-               IPv6_SendPacket(Conn->Interface, Conn->RemoteIP.v6, IP4PROT_TCP, Length, Data);
+               Header->Checksum = htons( IPv4_Checksum(checksum, sizeof(checksum)) );  // Combine the two
+               IPv6_SendPacket(Conn->Interface, Conn->RemoteIP.v6, IP4PROT_TCP, buffer);
                break;
        }
 }
@@ -149,7 +166,8 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
        tTCPListener    *srv;
        tTCPConnection  *conn;
 
-       Log_Log("TCP", "TCP_GetPacket: <Local>:%i from [%s]:%i, Flags= %s%s%s%s%s%s%s%s",
+       #if TCP_DEBUG
+       Log_Log("TCP", "TCP_GetPacket: <Local>:%i from [%s]:%i, Flags = %s%s%s%s%s%s%s%s",
                ntohs(hdr->DestPort),
                IPStack_PrintAddress(Interface->Type, Address),
                ntohs(hdr->SourcePort),
@@ -162,10 +180,11 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
                (hdr->Flags & TCP_FLAG_SYN) ? "SYN " : "",
                (hdr->Flags & TCP_FLAG_FIN) ? "FIN " : ""
                );
+       #endif
 
        if( Length > (hdr->DataOffset >> 4)*4 )
        {
-               Log_Log("TCP", "TCP_GetPacket: SequenceNumber = 0x%x", ntohl(hdr->SequenceNumber));
+               LOG("SequenceNumber = 0x%x", ntohl(hdr->SequenceNumber));
 #if HEXDUMP_INCOMING
                Debug_HexDump(
                        "TCP_GetPacket: Packet Data = ",
@@ -176,107 +195,97 @@ void TCP_GetPacket(tInterface *Interface, void *Address, int Length, void *Buffe
        }
 
        // Check Servers
+       for( srv = gTCP_Listeners; srv; srv = srv->Next )
        {
-               for( srv = gTCP_Listeners; srv; srv = srv->Next )
+               // Check if the server is active
+               if(srv->Port == 0)      continue;
+               // Check the interface
+               if(srv->Interface && srv->Interface != Interface)       continue;
+               // Check the destination port
+               if(srv->Port != htons(hdr->DestPort))   continue;
+               
+               Log_Log("TCP", "TCP_GetPacket: Matches server %p", srv);
+               // Is this in an established connection?
+               for( conn = srv->Connections; conn; conn = conn->Next )
                {
-                       // Check if the server is active
-                       if(srv->Port == 0)      continue;
-                       // Check the interface
-                       if(srv->Interface && srv->Interface != Interface)       continue;
-                       // Check the destination port
-                       if(srv->Port != htons(hdr->DestPort))   continue;
-                       
-                       Log_Log("TCP", "TCP_GetPacket: Matches server %p", srv);
-                       // Is this in an established connection?
-                       for( conn = srv->Connections; conn; conn = conn->Next )
-                       {
-                               // Check that it is coming in on the same interface
-                               if(conn->Interface != Interface)        continue;
-
-                               // Check Source Port
-                               Log_Log("TCP", "TCP_GetPacket: conn->RemotePort(%i) == hdr->SourcePort(%i)",
-                                       conn->RemotePort, ntohs(hdr->SourcePort));
-                               if(conn->RemotePort != ntohs(hdr->SourcePort))  continue;
-
-                               // Check Source IP
-                               Log_Debug("TCP", "TCP_GetPacket: conn->RemoteIP(%s)",
-                                       IPStack_PrintAddress(conn->Interface->Type, &conn->RemoteIP));
-                               Log_Debug("TCP", "                == Address(%s)",
-                                       IPStack_PrintAddress(conn->Interface->Type, Address));
-                               if( IPStack_CompareAddress(conn->Interface->Type, &conn->RemoteIP, Address, -1) == 0 )
-                                       continue ;
-
-                               Log_Log("TCP", "TCP_GetPacket: Matches connection %p", conn);
-                               // We have a response!
-                               TCP_INT_HandleConnectionPacket(conn, hdr, Length);
-
-                               return;
-                       }
+                       // Check that it is coming in on the same interface
+                       if(conn->Interface != Interface)        continue;
 
-                       Log_Log("TCP", "TCP_GetPacket: Opening Connection");
-                       // Open a new connection (well, check that it's a SYN)
-                       if(hdr->Flags != TCP_FLAG_SYN) {
-                               Log_Log("TCP", "TCP_GetPacket: Packet is not a SYN");
-                               return ;
-                       }
-                       
-                       // TODO: Check for halfopen max
-                       
-                       conn = calloc(1, sizeof(tTCPConnection));
-                       conn->State = TCP_ST_SYN_RCVD;
-                       conn->LocalPort = srv->Port;
-                       conn->RemotePort = ntohs(hdr->SourcePort);
-                       conn->Interface = Interface;
-                       
-                       switch(Interface->Type)
-                       {
-                       case 4: conn->RemoteIP.v4 = *(tIPv4*)Address;   break;
-                       case 6: conn->RemoteIP.v6 = *(tIPv6*)Address;   break;
-                       }
-                       
-                       conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
-                       
-                       conn->NextSequenceRcv = ntohl( hdr->SequenceNumber ) + 1;
-                       conn->NextSequenceSend = rand();
-                       
-                       // Create node
-                       conn->Node.NumACLs = 1;
-                       conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
-                       conn->Node.ImplPtr = conn;
-                       conn->Node.ImplInt = srv->NextID ++;
-                       conn->Node.Type = &gTCP_ClientNodeType; // TODO: Special type for the server end?
-                       
-                       // Hmm... Theoretically, this lock will never have to wait,
-                       // as the interface is locked to the watching thread, and this
-                       // runs in the watching thread. But, it's a good idea to have
-                       // it, just in case
-                       // Oh, wait, there is a case where a wildcard can be used
-                       // (srv->Interface == NULL) so having the lock is a good idea
-                       SHORTLOCK(&srv->lConnections);
-                       if( !srv->Connections )
-                               srv->Connections = conn;
-                       else
-                               srv->ConnectionsTail->Next = conn;
-                       srv->ConnectionsTail = conn;
-                       if(!srv->NewConnections)
-                               srv->NewConnections = conn;
-                       VFS_MarkAvaliable( &srv->Node, 1 );
-                       SHORTREL(&srv->lConnections);
-
-                       // Send the SYN ACK
-                       hdr->Flags |= TCP_FLAG_ACK;
-                       hdr->AcknowlegementNumber = htonl(conn->NextSequenceRcv);
-                       hdr->SequenceNumber = htonl(conn->NextSequenceSend);
-                       hdr->DestPort = hdr->SourcePort;
-                       hdr->SourcePort = htons(srv->Port);
-                       hdr->DataOffset = (sizeof(tTCPHeader)/4) << 4;
-                       TCP_SendPacket( conn, sizeof(tTCPHeader), hdr );
-                       conn->NextSequenceSend ++;
+                       // Check Source Port
+                       Log_Log("TCP", "TCP_GetPacket: conn->RemotePort(%i) == hdr->SourcePort(%i)",
+                               conn->RemotePort, ntohs(hdr->SourcePort));
+                       if(conn->RemotePort != ntohs(hdr->SourcePort))  continue;
+
+                       // Check Source IP
+                       Log_Debug("TCP", "TCP_GetPacket: conn->RemoteIP(%s)",
+                               IPStack_PrintAddress(conn->Interface->Type, &conn->RemoteIP));
+                       Log_Debug("TCP", "                == Address(%s)",
+                               IPStack_PrintAddress(conn->Interface->Type, Address));
+                       if( IPStack_CompareAddress(conn->Interface->Type, &conn->RemoteIP, Address, -1) == 0 )
+                               continue ;
+
+                       Log_Log("TCP", "TCP_GetPacket: Matches connection %p", conn);
+                       // We have a response!
+                       TCP_INT_HandleConnectionPacket(conn, hdr, Length);
+
+                       return;
+               }
+
+               Log_Log("TCP", "TCP_GetPacket: Opening Connection");
+               // Open a new connection (well, check that it's a SYN)
+               if(hdr->Flags != TCP_FLAG_SYN) {
+                       Log_Log("TCP", "TCP_GetPacket: Packet is not a SYN");
                        return ;
                }
+               
+               // TODO: Check for halfopen max
+               
+               conn = TCP_int_CreateConnection(Interface, TCP_ST_SYN_RCVD);
+               conn->LocalPort = srv->Port;
+               conn->RemotePort = ntohs(hdr->SourcePort);
+               
+               switch(Interface->Type)
+               {
+               case 4: conn->RemoteIP.v4 = *(tIPv4*)Address;   break;
+               case 6: conn->RemoteIP.v6 = *(tIPv6*)Address;   break;
+               }
+               
+               conn->NextSequenceRcv = ntohl( hdr->SequenceNumber ) + 1;
+               conn->HighestSequenceRcvd = conn->NextSequenceRcv;
+               conn->NextSequenceSend = rand();
+               
+               conn->Node.ImplInt = srv->NextID ++;
+               
+               // Hmm... Theoretically, this lock will never have to wait,
+               // as the interface is locked to the watching thread, and this
+               // runs in the watching thread. But, it's a good idea to have
+               // it, just in case
+               // Oh, wait, there is a case where a wildcard can be used
+               // (srv->Interface == NULL) so having the lock is a good idea
+               SHORTLOCK(&srv->lConnections);
+               if( !srv->Connections )
+                       srv->Connections = conn;
+               else
+                       srv->ConnectionsTail->Next = conn;
+               srv->ConnectionsTail = conn;
+               if(!srv->NewConnections)
+                       srv->NewConnections = conn;
+               VFS_MarkAvaliable( &srv->Node, 1 );
+               SHORTREL(&srv->lConnections);
+               Semaphore_Signal(&srv->WaitingConnections, 1);
+
+               // Send the SYN ACK
+               hdr->Flags |= TCP_FLAG_ACK;
+               hdr->AcknowlegementNumber = htonl(conn->NextSequenceRcv);
+               hdr->SequenceNumber = htonl(conn->NextSequenceSend);
+               hdr->DestPort = hdr->SourcePort;
+               hdr->SourcePort = htons(srv->Port);
+               hdr->DataOffset = (sizeof(tTCPHeader)/4) << 4;
+               TCP_SendPacket( conn, hdr, 0, NULL );
+               conn->NextSequenceSend ++;
+               return ;
        }
 
-
        // Check Open Connections
        {
                for( conn = gTCP_OutbountCons; conn; conn = conn->Next )
@@ -322,18 +331,28 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
        // Syncronise sequence values
        if(Header->Flags & TCP_FLAG_SYN) {
                // TODO: What if the packet also has data?
+               if( Connection->LastACKSequence != Connection->NextSequenceRcv )
+                       TCP_INT_SendACK(Connection, "SYN");
                Connection->NextSequenceRcv = ntohl(Header->SequenceNumber);
+               // TODO: Process HighestSequenceRcvd
+               // HACK!
+               if( Connection->HighestSequenceRcvd == 0 )
+                       Connection->HighestSequenceRcvd = Connection->NextSequenceRcv;
+               Connection->LastACKSequence = Connection->NextSequenceRcv;
        }
        
        // Ackowledge a sent packet
        if(Header->Flags & TCP_FLAG_ACK) {
                // TODO: Process an ACKed Packet
-               Log_Log("TCP", "Conn %p, Sent packet 0x%x ACKed", Connection, Header->AcknowlegementNumber);
+               LOG("Conn %p, Sent packet 0x%x ACKed", Connection, Header->AcknowlegementNumber);
        }
        
        // Get length of data
        dataLen = Length - (Header->DataOffset>>4)*4;
-       Log_Log("TCP", "HandleConnectionPacket - dataLen = %i", dataLen);
+       LOG("dataLen = %i", dataLen);
+       #if TCP_DEBUG
+       Log_Debug("TCP", "State %i, dataLen = %x", Connection->State, dataLen);
+       #endif
        
        // 
        // State Machine
@@ -350,26 +369,29 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
        case TCP_ST_SYN_SENT:
                if( Header->Flags & TCP_FLAG_SYN )
                {
+                       if( Connection->HighestSequenceRcvd == Connection->NextSequenceRcv )
+                               Connection->HighestSequenceRcvd ++;
                        Connection->NextSequenceRcv ++;
-                       Header->DestPort = Header->SourcePort;
-                       Header->SourcePort = htons(Connection->LocalPort);
-                       Header->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
-                       Header->SequenceNumber = htonl(Connection->NextSequenceSend);
-                       Header->WindowSize = htons(TCP_WINDOW_SIZE);
-                       Header->Flags = TCP_FLAG_ACK;
-                       Header->DataOffset = (sizeof(tTCPHeader)/4) << 4;
-                       TCP_SendPacket( Connection, sizeof(tTCPHeader), Header );
                        
                        if( Header->Flags & TCP_FLAG_ACK )
                        {       
                                Log_Log("TCP", "ACKing SYN-ACK");
                                Connection->State = TCP_ST_OPEN;
+                               VFS_MarkFull(&Connection->Node, 0);
                        }
                        else
                        {
                                Log_Log("TCP", "ACKing SYN");
                                Connection->State = TCP_ST_SYN_RCVD;
                        }
+                       Header->DestPort = Header->SourcePort;
+                       Header->SourcePort = htons(Connection->LocalPort);
+                       Header->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
+                       Header->SequenceNumber = htonl(Connection->NextSequenceSend);
+                       Header->WindowSize = htons(TCP_WINDOW_SIZE);
+                       Header->Flags = TCP_FLAG_ACK;
+                       Header->DataOffset = (sizeof(tTCPHeader)/4) << 4;
+                       TCP_SendPacket( Connection, Header, 0, NULL );
                }
                break;
        
@@ -378,8 +400,9 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                if( Header->Flags & TCP_FLAG_ACK )
                {
                        // TODO: Handle max half-open limit
-                       Connection->State = TCP_ST_OPEN;
                        Log_Log("TCP", "Connection fully opened");
+                       Connection->State = TCP_ST_OPEN;
+                       VFS_MarkFull(&Connection->Node, 0);
                }
                break;
                
@@ -405,14 +428,12 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                                Log_Log("TCP", "ACK only packet");
                                return ;
                        }
-                       Connection->NextSequenceRcv ++; // TODO: Is this right? (empty packet counts as one byte)
+                       // TODO: Is this right? (empty packet counts as one byte)
+                       if( Connection->HighestSequenceRcvd == Connection->NextSequenceRcv )
+                               Connection->HighestSequenceRcvd ++;
+                       Connection->NextSequenceRcv ++;
                        Log_Log("TCP", "Empty Packet, inc and ACK the current sequence number");
-                       Header->DestPort = Header->SourcePort;
-                       Header->SourcePort = htons(Connection->LocalPort);
-                       Header->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
-                       Header->SequenceNumber = htonl(Connection->NextSequenceSend);
-                       Header->Flags |= TCP_FLAG_ACK;
-                       TCP_SendPacket( Connection, sizeof(tTCPHeader), Header );
+                       TCP_INT_SendACK(Connection, "Empty");
                        return ;
                }
                
@@ -423,7 +444,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                
                sequence_num = ntohl(Header->SequenceNumber);
                
-               Log_Log("TCP", "0x%08x <= 0x%08x < 0x%08x",
+               LOG("0x%08x <= 0x%08x < 0x%08x",
                        Connection->NextSequenceRcv,
                        ntohl(Header->SequenceNumber),
                        Connection->NextSequenceRcv + TCP_WINDOW_SIZE
@@ -439,9 +460,12 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                                dataLen
                                );
                        if(rv != 0) {
+                               Log_Notice("TCP", "TCP_INT_AppendRecieved rv %i", rv);
                                break;
                        }
-                       Log_Log("TCP", "0x%08x += %i", Connection->NextSequenceRcv, dataLen);
+                       LOG("0x%08x += %i", Connection->NextSequenceRcv, dataLen);
+                       if( Connection->HighestSequenceRcvd == Connection->NextSequenceRcv )
+                               Connection->HighestSequenceRcvd += dataLen;
                        Connection->NextSequenceRcv += dataLen;
                        
                        // TODO: This should be moved out of the watcher thread,
@@ -449,18 +473,20 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        // all connections on the interface to lag.
                        // - Meh, no real issue, as the cache shouldn't be that large
                        TCP_INT_UpdateRecievedFromFuture(Connection);
-               
-                       // ACK Packet
-                       Header->DestPort = Header->SourcePort;
-                       Header->SourcePort = htons(Connection->LocalPort);
-                       Header->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
-                       Header->SequenceNumber = htonl(Connection->NextSequenceSend);
-                       Header->WindowSize = htons(TCP_WINDOW_SIZE);
-                       Header->Flags &= TCP_FLAG_SYN;  // Eliminate all flags save for SYN
-                       Header->Flags |= TCP_FLAG_ACK;  // Add ACK
-                       Log_Log("TCP", "Sending ACK for 0x%08x", Connection->NextSequenceRcv);
-                       TCP_SendPacket( Connection, sizeof(tTCPHeader), Header );
-                       //Connection->NextSequenceSend ++;
+
+                       #if 1
+                       // - Only send an ACK if we've had a burst
+                       if( Connection->NextSequenceRcv > (Uint32)(TCP_DACK_THRESHOLD + Connection->LastACKSequence) )
+                       {
+                               TCP_INT_SendACK(Connection, "DACK Burst");
+                               // - Extend TCP deferred ACK timer
+                               Time_RemoveTimer(Connection->DeferredACKTimer);
+                       }
+                       // - Schedule the deferred ACK timer (if already scheduled, this is a NOP)
+                       Time_ScheduleTimer(Connection->DeferredACKTimer, TCP_DACK_TIMEOUT);
+                       #else
+                       TCP_INT_SendACK(Connection, "RX");
+                       #endif
                }
                // Check if the packet is in window
                else if( WrapBetween(Connection->NextSequenceRcv, sequence_num,
@@ -469,10 +495,9 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        Uint8   *dataptr = (Uint8*)Header + (Header->DataOffset>>4)*4;
                        #if CACHE_FUTURE_PACKETS_IN_BYTES
                        Uint32  index;
-                        int    i;
                        
                        index = sequence_num % TCP_WINDOW_SIZE;
-                       for( i = 0; i < dataLen; i ++ )
+                       for( int i = 0; i < dataLen; i ++ )
                        {
                                Connection->FuturePacketValidBytes[index/8] |= 1 << (index%8);
                                Connection->FuturePacketData[index] = dataptr[i];
@@ -531,7 +556,8 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                {
                        Log_Log("TCP", "Fully out of sequence packet (0x%08x not between 0x%08x and 0x%08x), dropped",
                                sequence_num, Connection->NextSequenceRcv, Connection->NextSequenceRcv+TCP_WINDOW_SIZE);
-                       // TODO: Spec says we should send an empty ACK with the current state
+                       // Spec says we should send an empty ACK with the current state
+                       TCP_INT_SendACK(Connection, "Bad Seq");
                }
                break;
        
@@ -568,7 +594,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        Header->SequenceNumber = htonl(Connection->NextSequenceSend);
                        Header->WindowSize = htons(TCP_WINDOW_SIZE);
                        Header->Flags = TCP_FLAG_ACK;
-                       TCP_SendPacket( Connection, sizeof(tTCPHeader), Header );
+                       TCP_SendPacket( Connection, Header, 0, NULL );
                        break ;
                }
                
@@ -594,7 +620,7 @@ void TCP_INT_HandleConnectionPacket(tTCPConnection *Connection, tTCPHeader *Head
                        Header->SequenceNumber = htonl(Connection->NextSequenceSend);
                        Header->WindowSize = htons(TCP_WINDOW_SIZE);
                        Header->Flags = TCP_FLAG_ACK;
-                       TCP_SendPacket( Connection, sizeof(tTCPHeader), Header );
+                       TCP_SendPacket( Connection, Header, 0, NULL );
                }
                break;
        
@@ -665,26 +691,30 @@ int TCP_INT_AppendRecieved(tTCPConnection *Connection, const void *Data, size_t
 void TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection)
 {
        #if CACHE_FUTURE_PACKETS_IN_BYTES
-        int    i, length = 0;
-       Uint32  index;
-       
        // Calculate length of contiguous bytes
-       length = Connection->HighestSequenceRcvd - Connection->NextSequenceRcv;
-       index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
-       for( i = 0; i < length; i ++ )
+        int    length = Connection->HighestSequenceRcvd - Connection->NextSequenceRcv;
+       Uint32  index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
+       LOG("length=%i, index=%i", length, index);
+       for( int i = 0; i < length; i ++ )
        {
-               if( Connection->FuturePacketValidBytes[i / 8] == 0xFF ) {
-                       i += 7; index += 7;
-                       continue;
-               }
-               else if( !(Connection->FuturePacketValidBytes[i / 8] & (1 << (i%8))) )
+                int    bit = index % 8;
+               Uint8   bitfield_byte = Connection->FuturePacketValidBytes[index / 8];
+               if( (bitfield_byte & (1 << bit)) == 0 ) {
+                       length = i;
                        break;
-               
-               index ++;
+               }
+
+               if( bitfield_byte == 0xFF ) {
+                        int    inc = 8 - bit;
+                       i += inc - 1;
+                       index += inc;
+               }
+               else {
+                       index ++;
+               }
                if(index > TCP_WINDOW_SIZE)
                        index -= TCP_WINDOW_SIZE;
        }
-       length = i;
        
        index = Connection->NextSequenceRcv % TCP_WINDOW_SIZE;
        
@@ -704,10 +734,10 @@ void TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection)
        
        // Mark (now saved) bytes as invalid
        // - Align index
-       while(index % 8 && length)
+       while(index % 8 && length > 0)
        {
                Connection->FuturePacketData[index] = 0;
-               Connection->FuturePacketData[index/8] &= ~(1 << (index%8));
+               Connection->FuturePacketValidBytes[index/8] &= ~(1 << (index%8));
                index ++;
                if(index > TCP_WINDOW_SIZE)
                        index -= TCP_WINDOW_SIZE;
@@ -764,13 +794,39 @@ void TCP_INT_UpdateRecievedFromFuture(tTCPConnection *Connection)
                SHORTREL( &Connection->lFuturePackets );
                
                // Looks like we found one
-               TCP_INT_AppendRecieved(Connection, pkt);
+               TCP_INT_AppendRecieved(Connection, pkt->Data, pkt->Length);
+               if( Connection->HighestSequenceRcvd == Connection->NextSequenceRcv )
+                       Connection->HighestSequenceRcvd += pkt->Length;
                Connection->NextSequenceRcv += pkt->Length;
                free(pkt);
        }
        #endif
 }
 
+void TCP_int_SendDelayedACK(void *ConnPtr)
+{
+       TCP_INT_SendACK(ConnPtr, "DACK Timeout");
+}
+
+void TCP_INT_SendACK(tTCPConnection *Connection, const char *Reason)
+{
+       tTCPHeader      hdr;
+       // ACK Packet
+       hdr.DataOffset = (sizeof(tTCPHeader)/4) << 4;
+       hdr.DestPort = htons(Connection->RemotePort);
+       hdr.SourcePort = htons(Connection->LocalPort);
+       hdr.AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
+       hdr.SequenceNumber = htonl(Connection->NextSequenceSend);
+       hdr.WindowSize = htons(TCP_WINDOW_SIZE);
+       hdr.Flags = TCP_FLAG_ACK;       // TODO: Determine if SYN is wanted too
+       hdr.Checksum = 0;       // TODO: Checksum
+       hdr.UrgentPointer = 0;
+       Log_Debug("TCP", "Sending ACK for 0x%08x (%s)", Connection->NextSequenceRcv, Reason);
+       TCP_SendPacket( Connection, &hdr, 0, NULL );
+       //Connection->NextSequenceSend ++;
+       Connection->LastACKSequence = Connection->NextSequenceRcv;
+}
+
 /**
  * \fn Uint16 TCP_GetUnusedPort()
  * \brief Gets an unused port and allocates it
@@ -828,6 +884,38 @@ int TCP_DeallocatePort(Uint16 Port)
        return 1;
 }
 
+tTCPConnection *TCP_int_CreateConnection(tInterface *Interface, enum eTCPConnectionState State)
+{
+       tTCPConnection  *conn = calloc( sizeof(tTCPConnection) + TCP_WINDOW_SIZE + TCP_WINDOW_SIZE/8, 1 );
+
+       conn->State = State;
+       conn->Interface = Interface;
+       conn->LocalPort = -1;
+       conn->RemotePort = -1;
+
+       conn->Node.ReferenceCount = 1;
+       conn->Node.ImplPtr = conn;
+       conn->Node.NumACLs = 1;
+       conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
+       conn->Node.Type = &gTCP_ClientNodeType;
+       conn->Node.BufferFull = 1;      // Cleared when connection opens
+
+       conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
+       #if 0
+       conn->SentBuffer = RingBuffer_Create( TCP_SEND_BUFFER_SIZE );
+       Semaphore_Init(conn->SentBufferSpace, 0, TCP_SEND_BUFFER_SIZE, "TCP SentBuffer", conn->Name);
+       #endif
+       
+       #if CACHE_FUTURE_PACKETS_IN_BYTES
+       // Future recieved data (ahead of the expected sequence number)
+       conn->FuturePacketData = (Uint8*)conn + sizeof(tTCPConnection);
+       conn->FuturePacketValidBytes = conn->FuturePacketData + TCP_WINDOW_SIZE;
+       #endif
+
+       conn->DeferredACKTimer = Time_AllocateTimer( TCP_int_SendDelayedACK, conn);
+       return conn;
+}
+
 // --- Server
 tVFS_Node *TCP_Server_Init(tInterface *Interface)
 {
@@ -868,24 +956,17 @@ tVFS_Node *TCP_Server_Init(tInterface *Interface)
  * \param Node Server node
  * \param Pos  Position (ignored)
  */
-char *TCP_Server_ReadDir(tVFS_Node *Node, int Pos)
+int TCP_Server_ReadDir(tVFS_Node *Node, int Pos, char Dest[FILENAME_MAX])
 {
        tTCPListener    *srv = Node->ImplPtr;
        tTCPConnection  *conn;
-       char    *ret;
        
        ENTER("pNode iPos", Node, Pos);
 
        Log_Log("TCP", "Thread %i waiting for a connection", Threads_GetTID());
-       for(;;)
-       {
-               SHORTLOCK( &srv->lConnections );
-               if( srv->NewConnections != NULL )       break;
-               SHORTREL( &srv->lConnections );
-               Threads_Yield();        // TODO: Sleep until poked
-       }
+       Semaphore_Wait( &srv->WaitingConnections, 1 );
        
-
+       SHORTLOCK(&srv->lConnections);
        // Increment the new list (the current connection is still on the 
        // normal list)
        conn = srv->NewConnections;
@@ -901,11 +982,10 @@ char *TCP_Server_ReadDir(tVFS_Node *Node, int Pos)
        LOG("srv->NewConnections = %p", srv->NewConnections);
        LOG("srv->ConnectionsTail = %p", srv->ConnectionsTail);
 
-       ret = malloc(9);
-       itoa(ret, conn->Node.ImplInt, 16, 8, '0');
-       Log_Log("TCP", "Thread %i got '%s'", Threads_GetTID(), ret);
-       LEAVE('s', ret);
-       return ret;
+       itoa(Dest, conn->Node.ImplInt, 16, 8, '0');
+       Log_Log("TCP", "Thread %i got connection '%s'", Threads_GetTID(), Dest);
+       LEAVE('i', 0);
+       return 0;
 }
 
 /**
@@ -913,7 +993,7 @@ char *TCP_Server_ReadDir(tVFS_Node *Node, int Pos)
  * \param Node Server node
  * \param Name Hexadecimal ID of the node
  */
-tVFS_Node *TCP_Server_FindDir(tVFS_Node *Node, const char *Name)
+tVFS_Node *TCP_Server_FindDir(tVFS_Node *Node, const char *Name, Uint Flags)
 {
        tTCPConnection  *conn;
        tTCPListener    *srv = Node->ImplPtr;
@@ -1028,29 +1108,7 @@ void TCP_Server_Close(tVFS_Node *Node)
  */
 tVFS_Node *TCP_Client_Init(tInterface *Interface)
 {
-       tTCPConnection  *conn = calloc( sizeof(tTCPConnection) + TCP_WINDOW_SIZE + TCP_WINDOW_SIZE/8, 1 );
-
-       conn->State = TCP_ST_CLOSED;
-       conn->Interface = Interface;
-       conn->LocalPort = -1;
-       conn->RemotePort = -1;
-
-       conn->Node.ImplPtr = conn;
-       conn->Node.NumACLs = 1;
-       conn->Node.ACLs = &gVFS_ACL_EveryoneRW;
-       conn->Node.Type = &gTCP_ClientNodeType;
-
-       conn->RecievedBuffer = RingBuffer_Create( TCP_RECIEVE_BUFFER_SIZE );
-       #if 0
-       conn->SentBuffer = RingBuffer_Create( TCP_SEND_BUFFER_SIZE );
-       Semaphore_Init(conn->SentBufferSpace, 0, TCP_SEND_BUFFER_SIZE, "TCP SentBuffer", conn->Name);
-       #endif
-       
-       #if CACHE_FUTURE_PACKETS_IN_BYTES
-       // Future recieved data (ahead of the expected sequence number)
-       conn->FuturePacketData = (Uint8*)conn + sizeof(tTCPConnection);
-       conn->FuturePacketValidBytes = conn->FuturePacketData + TCP_WINDOW_SIZE;
-       #endif
+       tTCPConnection  *conn = TCP_int_CreateConnection(Interface, TCP_ST_CLOSED);
 
        SHORTLOCK(&glTCP_OutbountCons);
        conn->Next = gTCP_OutbountCons;
@@ -1065,7 +1123,7 @@ tVFS_Node *TCP_Client_Init(tInterface *Interface)
  * \note If \a Length is smaller than the size of the packet, the rest
  *       of the packet's data will be discarded.
  */
-Uint64 TCP_Client_Read(tVFS_Node *Node, Uint64 Offset, Uint64 Length, void *Buffer)
+size_t TCP_Client_Read(tVFS_Node *Node, off_t Offset, size_t Length, void *Buffer, Uint Flags)
 {
        tTCPConnection  *conn = Node->ImplPtr;
        size_t  len;
@@ -1073,14 +1131,9 @@ Uint64 TCP_Client_Read(tVFS_Node *Node, Uint64 Offset, Uint64 Length, void *Buff
        ENTER("pNode XOffset XLength pBuffer", Node, Offset, Length, Buffer);
        LOG("conn = %p {State:%i}", conn, conn->State);
        
-       // Check if connection is estabilishing
-       // - TODO: Sleep instead (maybe using VFS_SelectNode to wait for the
-       //   data to be availiable
-       while( conn->State == TCP_ST_SYN_RCVD || conn->State == TCP_ST_SYN_SENT )
-               Threads_Yield();
-       
-       // If the conneciton is not open, then clean out the recieved buffer
-       if( conn->State != TCP_ST_OPEN )
+       // If the connection has been closed (state > ST_OPEN) then clear
+       // any stale data in the buffer (until it is empty (until it is empty))
+       if( conn->State > TCP_ST_OPEN )
        {
                Mutex_Acquire( &conn->lRecievedPackets );
                len = RingBuffer_Read( Buffer, conn->RecievedBuffer, Length );
@@ -1088,6 +1141,7 @@ Uint64 TCP_Client_Read(tVFS_Node *Node, Uint64 Offset, Uint64 Length, void *Buff
                
                if( len == 0 ) {
                        VFS_MarkAvaliable(Node, 0);
+                       errno = 0;
                        LEAVE('i', -1);
                        return -1;
                }
@@ -1097,7 +1151,17 @@ Uint64 TCP_Client_Read(tVFS_Node *Node, Uint64 Offset, Uint64 Length, void *Buff
        }
        
        // Wait
-       VFS_SelectNode(Node, VFS_SELECT_READ|VFS_SELECT_ERROR, NULL, "TCP_Client_Read");
+       {
+               tTime   *timeout = NULL;
+               tTime   timeout_zero = 0;
+               if( Flags & VFS_IOFLAG_NOBLOCK )
+                       timeout = &timeout_zero;
+               if( !VFS_SelectNode(Node, VFS_SELECT_READ|VFS_SELECT_ERROR, timeout, "TCP_Client_Read") ) {
+                       errno = EWOULDBLOCK;
+                       LEAVE('i', -1);
+                       return -1;
+               }
+       }
        
        // Lock list and read as much as possible (up to `Length`)
        Mutex_Acquire( &conn->lRecievedPackets );
@@ -1122,6 +1186,9 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
 {
        char    buf[sizeof(tTCPHeader)+Length];
        tTCPHeader      *packet = (void*)buf;
+
+       // - Stop Delayed ACK timer (as this data packet ACKs)
+       Time_RemoveTimer(Connection->DeferredACKTimer);
        
        packet->SourcePort = htons(Connection->LocalPort);
        packet->DestPort = htons(Connection->RemotePort);
@@ -1131,6 +1198,7 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
        packet->AcknowlegementNumber = htonl(Connection->NextSequenceRcv);
        packet->SequenceNumber = htonl(Connection->NextSequenceSend);
        packet->Flags = TCP_FLAG_PSH|TCP_FLAG_ACK;      // Hey, ACK if you can!
+       packet->UrgentPointer = 0;
        
        memcpy(packet->Options, Data, Length);
        
@@ -1139,7 +1207,7 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
        Debug_HexDump("TCP_INT_SendDataPacket: Data = ", Data, Length);
 #endif
        
-       TCP_SendPacket( Connection, sizeof(tTCPHeader)+Length, packet );
+       TCP_SendPacket( Connection, packet, Length, Data );
        
        Connection->NextSequenceSend += Length;
 }
@@ -1147,7 +1215,7 @@ void TCP_INT_SendDataPacket(tTCPConnection *Connection, size_t Length, const voi
 /**
  * \brief Send some bytes on a connection
  */
-Uint64 TCP_Client_Write(tVFS_Node *Node, Uint64 Offset, Uint64 Length, const void *Buffer)
+size_t TCP_Client_Write(tVFS_Node *Node, off_t Offset, size_t Length, const void *Buffer, Uint Flags)
 {
        tTCPConnection  *conn = Node->ImplPtr;
        size_t  rem = Length;
@@ -1159,16 +1227,27 @@ Uint64 TCP_Client_Write(tVFS_Node *Node, Uint64 Offset, Uint64 Length, const voi
 //             Buffer, Length);
 //     #endif
        
-       // Check if connection is open
-       while( conn->State == TCP_ST_SYN_RCVD || conn->State == TCP_ST_SYN_SENT )
-               Threads_Yield();
-       
-       if( conn->State != TCP_ST_OPEN ) {
+       // Don't allow a write to a closed connection
+       if( conn->State > TCP_ST_OPEN ) {
                VFS_MarkError(Node, 1);
+               errno = 0;
                LEAVE('i', -1);
                return -1;
        }
        
+       // Wait
+       {
+               tTime   *timeout = NULL;
+               tTime   timeout_zero = 0;
+               if( Flags & VFS_IOFLAG_NOBLOCK )
+                       timeout = &timeout_zero;
+               if( !VFS_SelectNode(Node, VFS_SELECT_WRITE|VFS_SELECT_ERROR, timeout, "TCP_Client_Write") ) {
+                       errno = EWOULDBLOCK;
+                       LEAVE('i', -1);
+                       return -1;
+               }
+       }
+       
        do
        {
                 int    len = (rem < TCP_MAX_PACKET_SIZE) ? rem : TCP_MAX_PACKET_SIZE;
@@ -1211,7 +1290,7 @@ void TCP_StartConnection(tTCPConnection *Conn)
        hdr.WindowSize = htons(TCP_WINDOW_SIZE);        // Max
        hdr.Checksum = 0;       // TODO
        
-       TCP_SendPacket( Conn, sizeof(tTCPHeader), &hdr );
+       TCP_SendPacket( Conn, &hdr, 0, NULL );
        
        Conn->NextSequenceSend ++;
        Conn->State = TCP_ST_SYN_SENT;
@@ -1273,13 +1352,10 @@ int TCP_Client_IOCtl(tVFS_Node *Node, int ID, void *Data)
                        LEAVE_RET('i', 0);
 
                {
-                       tTime   timeout_end = now() + conn->Interface->TimeoutDelay;
+                       tTime   timeout = conn->Interface->TimeoutDelay;
        
                        TCP_StartConnection(conn);
-                       // TODO: Wait for connection to open
-                       while( conn->State == TCP_ST_SYN_SENT && timeout_end > now() ) {
-                               Threads_Yield();
-                       }
+                       VFS_SelectNode(&conn->Node, VFS_SELECT_WRITE, &timeout, "TCP Connection");
                        if( conn->State == TCP_ST_SYN_SENT )
                                LEAVE_RET('i', 0);
                }
@@ -1301,6 +1377,15 @@ void TCP_Client_Close(tVFS_Node *Node)
        
        ENTER("pNode", Node);
        
+       ASSERT(Node->ReferenceCount != 0);
+
+       if( Node->ReferenceCount > 1 ) {
+               Node->ReferenceCount --;
+               LOG("Dereference only");
+               LEAVE('-');
+               return ;
+       }
+       
        if( conn->State == TCP_ST_CLOSE_WAIT || conn->State == TCP_ST_OPEN )
        {
                packet.SourcePort = htons(conn->LocalPort);
@@ -1312,11 +1397,14 @@ void TCP_Client_Close(tVFS_Node *Node)
                packet.SequenceNumber = htonl(conn->NextSequenceSend);
                packet.Flags = TCP_FLAG_FIN;
                
-               TCP_SendPacket( conn, sizeof(tTCPHeader), &packet );
+               TCP_SendPacket( conn, &packet, 0, NULL );
        }
        
        switch( conn->State )
        {
+       case TCP_ST_CLOSED:
+               Log_Warning("TCP", "Closing connection that was never opened");
+               break;
        case TCP_ST_CLOSE_WAIT:
                conn->State = TCP_ST_LAST_ACK;
                break;
@@ -1325,10 +1413,13 @@ void TCP_Client_Close(tVFS_Node *Node)
                while( conn->State == TCP_ST_FIN_WAIT1 )        Threads_Yield();
                break;
        default:
-               Log_Warning("TCP", "Unhandled connection state in TCP_Client_Close");
+               Log_Warning("TCP", "Unhandled connection state %i in TCP_Client_Close",
+                       conn->State);
                break;
        }
        
+       Time_RemoveTimer(conn->DeferredACKTimer);
+       Time_FreeTimer(conn->DeferredACKTimer);
        free(conn);
        
        LEAVE('-');

UCC git Repository :: git.ucc.asn.au