aboutsummaryrefslogtreecommitdiff
path: root/helper/chksum.c
diff options
context:
space:
mode:
authorBarry Spinney <spinney@mellanox.com>2016-04-28 22:35:41 -0500
committerMaxim Uvarov <maxim.uvarov@linaro.org>2016-04-29 11:47:51 +0300
commit0e1645087dea2c7744b2384e8c4d790d79e3125e (patch)
tree286e7c4bd3112df6e4175c2c242321de5842b641 /helper/chksum.c
parent166859a5380cd156f65a2143216d04e17399b6dc (diff)
helper: add a more complete and correct checksum implementation
This patch adds a file called chksum.c which implements a complete TCP/UDP over either IPv4 or IPV6 checksum generation / verification capability. In addition it can deal with any form of packet segmentation including tiny segments, segments not aligned to a 2 byte boundary, etc. Signed-off-by: Barry Spinney <spinney@mellanox.com> Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
Diffstat (limited to 'helper/chksum.c')
-rw-r--r--helper/chksum.c342
1 files changed, 342 insertions, 0 deletions
diff --git a/helper/chksum.c b/helper/chksum.c
new file mode 100644
index 000000000..859d1ec96
--- /dev/null
+++ b/helper/chksum.c
@@ -0,0 +1,342 @@
+/* Copyright (c) 2016, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <odp.h>
+#include <odp/helper/ip.h>
+#include <odp/helper/udp.h>
+#include <odp/helper/tcp.h>
+#include <odp/helper/chksum.h>
+
+/* The following union type is used to "view" an ordered set of bytes (either
+ * 2 or 4) as 1 or 2 16-bit quantities - using host endian order. */
+typedef union {
+ uint16_t words16[2];
+ uint8_t bytes[4];
+} swap_buf_t;
+
+static uint8_t ZEROS[2] = { 0, 0 };
+
+/* Note that for data_seg_sum byte_len MUST be >= 1. This function Returns the
+ * sum of the data (as described by data8_ptr and data_len) as 16-bit
+ * integers. */
+
+static uint32_t data_seg_sum(uint8_t *data8_ptr,
+ uint32_t data_len, /* length in bytes */
+ odp_bool_t is_last,
+ odp_bool_t has_odd_byte_in,
+ uint8_t *odd_byte_in_out)
+{
+ swap_buf_t swap_buf;
+ uint32_t sum, len_in_16_byte_chunks, idx, data0, data1, data2, data3;
+ uint32_t data4, data5, data6, data7;
+ uint16_t *data16_ptr;
+
+ sum = 0;
+ if (has_odd_byte_in) {
+ swap_buf.bytes[0] = *odd_byte_in_out;
+ swap_buf.bytes[1] = *data8_ptr++;
+ sum += (uint32_t)swap_buf.words16[0];
+ data_len--;
+ }
+
+ data16_ptr = (uint16_t *)data8_ptr;
+
+ /* The following code tries to gain a modest performance enhancement by
+ * unrolling the normal 16 bits at a time loop eight times. Even
+ * better would be to add some data prefetching instructions here. */
+ len_in_16_byte_chunks = data_len / 16;
+ for (idx = 0; idx < len_in_16_byte_chunks; idx++) {
+ data0 = (uint32_t)*data16_ptr++;
+ data1 = (uint32_t)*data16_ptr++;
+ data2 = (uint32_t)*data16_ptr++;
+ data3 = (uint32_t)*data16_ptr++;
+ data4 = (uint32_t)*data16_ptr++;
+ data5 = (uint32_t)*data16_ptr++;
+ data6 = (uint32_t)*data16_ptr++;
+ data7 = (uint32_t)*data16_ptr++;
+
+ data_len -= 16;
+ sum += data0 + data1;
+ sum += data2 + data3;
+ sum += data4 + data5;
+ sum += data6 + data7;
+ }
+
+ for (idx = 0; idx < data_len / 2; idx++)
+ sum += (uint32_t)*data16_ptr++;
+
+ if ((data_len & 1) == 0)
+ return sum;
+
+ /* Now handle the case of a single odd byte. */
+ if (is_last) {
+ swap_buf.bytes[0] = *(uint8_t *)data16_ptr;
+ swap_buf.bytes[1] = 0;
+ sum += (uint32_t)swap_buf.words16[0];
+ } else {
+ *odd_byte_in_out = *(uint8_t *)data16_ptr;
+ }
+
+ return sum;
+}
+
+static inline int odph_process_l4_hdr(odp_packet_t odp_pkt,
+ odph_chksum_op_t op,
+ uint16_t *chksum_ptr,
+ uint32_t *l4_len_ptr,
+ odp_bool_t *split_l4_hdr_ptr,
+ odp_bool_t *is_tcp_ptr,
+ uint32_t *pkt_chksum_offset_ptr,
+ uint16_t **pkt_chksum_ptr_ptr)
+{
+ odph_udphdr_t *udp_hdr_ptr, udp_hdr;
+ odph_tcphdr_t *tcp_hdr_ptr, tcp_hdr;
+ odp_bool_t split_l4_hdr, is_tcp;
+ uint32_t l4_offset, l4_len, hdr_len, pkt_chksum_offset;
+ uint16_t *pkt_chksum_ptr;
+ uint8_t *l4_ptr;
+
+ /* Parse the TCP/UDP header. */
+ l4_offset = odp_packet_l4_offset(odp_pkt);
+ l4_ptr = odp_packet_l4_ptr(odp_pkt, &hdr_len);
+ pkt_chksum_offset = l4_offset;
+ l4_len = 0;
+ split_l4_hdr = false;
+ is_tcp = false;
+
+ if (odp_packet_has_udp(odp_pkt)) {
+ udp_hdr_ptr = (odph_udphdr_t *)l4_ptr;
+ split_l4_hdr = hdr_len < ODPH_UDPHDR_LEN;
+ if (split_l4_hdr) {
+ odp_packet_copy_to_mem(odp_pkt, l4_offset,
+ ODPH_UDPHDR_LEN, &udp_hdr);
+ udp_hdr_ptr = &udp_hdr;
+ }
+
+ /* According to the spec's the l4_len to be used for UDP pkts
+ * should come from the udp header, unlike for TCP where is
+ * derived. */
+ l4_len = odp_be_to_cpu_16(udp_hdr_ptr->length);
+ pkt_chksum_ptr = &udp_hdr_ptr->chksum;
+ pkt_chksum_offset = l4_offset + offsetof(odph_udphdr_t, chksum);
+ } else if (odp_packet_has_tcp(odp_pkt)) {
+ tcp_hdr_ptr = (odph_tcphdr_t *)l4_ptr;
+ split_l4_hdr = hdr_len < ODPH_TCPHDR_LEN;
+ if (split_l4_hdr) {
+ odp_packet_copy_to_mem(odp_pkt, l4_offset,
+ ODPH_TCPHDR_LEN, &tcp_hdr);
+ tcp_hdr_ptr = &tcp_hdr;
+ }
+
+ pkt_chksum_ptr = &tcp_hdr_ptr->cksm;
+ pkt_chksum_offset = l4_offset + offsetof(odph_tcphdr_t, cksm);
+ is_tcp = true;
+ } else {
+ return -1;
+ }
+
+ /* Note that if the op is ODPH_CHKSUM_VERIFY and the existing
+ * chksum field is 0 and this is a UDP pkt and the chksum_ptr is NULL
+ * then skip the rest of the chksum calculation, returning 1 instead. */
+ if ((op == ODPH_CHKSUM_VERIFY) && (*pkt_chksum_ptr == 0) &&
+ (!is_tcp) && (chksum_ptr == NULL))
+ return 1;
+
+ /* If we are doing a ODPH_CHKSUM_GENERATE op, then make sure that the
+ * existing chksum field has been set to zeros. */
+ if ((op == ODPH_CHKSUM_GENERATE) && (*pkt_chksum_ptr != 0)) {
+ if (split_l4_hdr)
+ odp_packet_copy_from_mem(odp_pkt, pkt_chksum_offset,
+ 2, ZEROS);
+ else
+ *pkt_chksum_ptr = 0;
+ }
+
+ *l4_len_ptr = l4_len;
+ *split_l4_hdr_ptr = split_l4_hdr;
+ *is_tcp_ptr = is_tcp;
+ *pkt_chksum_offset_ptr = pkt_chksum_offset;
+ *pkt_chksum_ptr_ptr = pkt_chksum_ptr;
+ return 0;
+}
+
+/* odph_process_l3_hdr includes the 16-bit sum of the pseudo header. */
+
+static inline int odph_process_l3_hdr(odp_packet_t odp_pkt,
+ odp_bool_t is_tcp,
+ uint32_t *l4_len_ptr,
+ uint32_t *sum_ptr)
+{
+ odph_ipv4hdr_t *ipv4_hdr_ptr, ipv4_hdr;
+ odph_ipv6hdr_t *ipv6_hdr_ptr, ipv6_hdr;
+ odp_bool_t split_l3_hdr;
+ swap_buf_t swap_buf;
+ uint32_t l3_offset, l4_offset, l3_hdrs_len, hdr_len, addrs_len;
+ uint32_t protocol, l3_len, l4_len, idx, ipv6_payload_len, sum;
+ uint16_t *addrs_ptr;
+
+ /* The following computation using the l3 and l4 offsets handles both
+ * the case of IPv4 options and IPv6 extension headers uniformly. */
+ l3_offset = odp_packet_l3_offset(odp_pkt);
+ l4_offset = odp_packet_l4_offset(odp_pkt);
+ l3_hdrs_len = l4_offset - l3_offset;
+
+ /* Parse the IPv4/IPv6 header. */
+ split_l3_hdr = false;
+ if (odp_packet_has_ipv4(odp_pkt)) {
+ ipv4_hdr_ptr = odp_packet_l3_ptr(odp_pkt, &hdr_len);
+ split_l3_hdr = hdr_len < ODPH_IPV4HDR_LEN;
+ if (split_l3_hdr) {
+ odp_packet_copy_to_mem(odp_pkt, l3_offset,
+ ODPH_IPV4HDR_LEN, &ipv4_hdr);
+ ipv4_hdr_ptr = &ipv4_hdr;
+ }
+
+ addrs_ptr = (uint16_t *)&ipv4_hdr_ptr->src_addr;
+ addrs_len = 2 * ODPH_IPV4ADDR_LEN;
+ protocol = ipv4_hdr_ptr->proto;
+ l3_len = odp_be_to_cpu_16(ipv4_hdr_ptr->tot_len);
+ } else if (odp_packet_has_ipv6(odp_pkt)) {
+ ipv6_hdr_ptr = odp_packet_l3_ptr(odp_pkt, &hdr_len);
+ split_l3_hdr = hdr_len < ODPH_IPV6HDR_LEN;
+ if (split_l3_hdr) {
+ odp_packet_copy_to_mem(odp_pkt, l3_offset,
+ ODPH_IPV6HDR_LEN, &ipv6_hdr);
+ ipv6_hdr_ptr = &ipv6_hdr;
+ }
+
+ addrs_ptr = (uint16_t *)&ipv6_hdr_ptr->src_addr;
+ addrs_len = 2 * ODPH_IPV6ADDR_LEN;
+ protocol = ipv6_hdr_ptr->next_hdr;
+ ipv6_payload_len = odp_be_to_cpu_16(ipv6_hdr_ptr->payload_len);
+ l3_len = ipv6_payload_len + ODPH_IPV6HDR_LEN;
+ } else {
+ return -1;
+ }
+
+ /* For UDP pkts, must use the incoming l4_len taken from the udp header.
+ * For tcp pkts the l4_len is derived from the l3_len and l3_hdrs_len
+ * calculated above. */
+ l4_len = is_tcp ? (l3_len - l3_hdrs_len) : *l4_len_ptr;
+
+ /* Do a one's complement addition over the IP pseudo-header.
+ * Note that the pseudo-header is different for IPv4 and IPv6. */
+ sum = 0;
+ for (idx = 0; idx < addrs_len / 2; idx++)
+ sum += (uint32_t)*addrs_ptr++;
+
+ /* Need to convert l4_len and protocol into endian independent form */
+ swap_buf.bytes[0] = (l4_len >> 8) & 0xFF;
+ swap_buf.bytes[1] = (l4_len >> 0) & 0xFF;
+ swap_buf.bytes[2] = 0;
+ swap_buf.bytes[3] = protocol;
+
+ sum += (uint32_t)swap_buf.words16[0] + (uint32_t)swap_buf.words16[1];
+
+ *l4_len_ptr = l4_len;
+ *sum_ptr = sum;
+ return 0;
+}
+
+/* Note that this implementation does not including any code or conditionally
+ * modified code that is endian specific, yet it works equally well on BIG or
+ * LITTLE endian machines. The reason that this works is primarily because
+ * a 16-bit one's complement sum happens to be "endian-agnostic". Specifically
+ * if one does a sum of 16-bit pkt values on a big endian machine and then on
+ * a little endian machine, they will not agree. But after turning it into
+ * a one's complement sum by adding the carry bits in and truncating to
+ * 16-bits (which may need to be done more than once), the final 16-bit results
+ * will be byte-swapped versions of the other. Then after storing the result
+ * back into the pkt (as a 16-bit value), the final byte pattern will be
+ * identical for both machines. */
+
+int odph_udp_tcp_chksum(odp_packet_t odp_pkt,
+ odph_chksum_op_t op,
+ uint16_t *chksum_ptr)
+{
+ odp_bool_t split_l4_hdr, is_tcp, is_last;
+ odp_bool_t has_odd_byte_in;
+ uint32_t l4_len, sum, ones_compl_sum, remaining_seg_len, data_len;
+ uint32_t pkt_chksum_offset, offset;
+ uint16_t *pkt_chksum_ptr, chksum;
+ uint8_t *data_ptr, odd_byte_in_out;
+ int rc, ret_code;
+
+ /* First parse and process the l4 header */
+ rc = odph_process_l4_hdr(odp_pkt, op, chksum_ptr, &l4_len,
+ &split_l4_hdr, &is_tcp, &pkt_chksum_offset,
+ &pkt_chksum_ptr);
+ if (rc != 0)
+ return rc;
+
+ /* Note that in addition to parsing the l3 header, this function
+ * does the sum of the pseudo header. */
+ rc = odph_process_l3_hdr(odp_pkt, is_tcp, &l4_len, &sum);
+ if (rc != 0)
+ return rc;
+
+ /* The following code handles all of the different cases where the
+ * data to be checksummed might be split among an arbitrary number of
+ * segments, each of an arbitrary length (include odd alignments!). */
+ data_ptr = odp_packet_l4_ptr(odp_pkt, &remaining_seg_len);
+ offset = odp_packet_l4_offset(odp_pkt);
+ has_odd_byte_in = false;
+ odd_byte_in_out = 0;
+
+ while (true) {
+ data_len = remaining_seg_len;
+ is_last = false;
+ if (l4_len < remaining_seg_len)
+ data_len = l4_len;
+ else if (l4_len == remaining_seg_len)
+ is_last = true;
+
+ sum += data_seg_sum(data_ptr, data_len, is_last,
+ has_odd_byte_in, &odd_byte_in_out);
+ l4_len -= data_len;
+ if (l4_len == 0)
+ break;
+
+ if (data_len & 1)
+ has_odd_byte_in = !has_odd_byte_in;
+
+ offset += data_len;
+ data_ptr = odp_packet_offset(odp_pkt, offset,
+ &remaining_seg_len, NULL);
+ }
+
+ /* Now do the one's complement "carry" algorithm. Up until now this
+ * has just been regular two's complement addition. Note that it is
+ * important that this regular sum of 16-bit quantities be done with
+ * at least 32-bit arithmetic to prevent the loss of the carries.
+ * Note that it can be proven that only two rounds of the carry
+ * wrap around logic are necessary (assuming 32-bit arithmetic and
+ * a data length of < 64K). */
+ ones_compl_sum = (sum & 0xFFFF) + (sum >> 16);
+ ones_compl_sum = (ones_compl_sum & 0xFFFF) + (ones_compl_sum >> 16);
+ chksum = (~ones_compl_sum) & 0xFFFF;
+ ret_code = 0;
+
+ /* Now based upon the given op, the calculated chksum and the incoming
+ * chksum value complete the operation. */
+ if (op == ODPH_CHKSUM_GENERATE) {
+ if (split_l4_hdr)
+ odp_packet_copy_from_mem(odp_pkt, pkt_chksum_offset,
+ 2, &chksum);
+ else
+ *pkt_chksum_ptr = chksum;
+ } else if (op == ODPH_CHKSUM_VERIFY) {
+ if ((*pkt_chksum_ptr == 0) && (!is_tcp))
+ ret_code = 1;
+ else
+ ret_code = (chksum == 0) ? 0 : 2;
+ }
+
+ if (chksum_ptr != NULL)
+ *chksum_ptr = chksum;
+
+ return ret_code;
+}