| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | /* | 
|---|
| 3 | * arch/x86_64/lib/csum-partial.c | 
|---|
| 4 | * | 
|---|
| 5 | * This file contains network checksum routines that are better done | 
|---|
| 6 | * in an architecture-specific manner due to speed. | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include <linux/compiler.h> | 
|---|
| 10 | #include <linux/export.h> | 
|---|
| 11 | #include <asm/checksum.h> | 
|---|
| 12 | #include <asm/word-at-a-time.h> | 
|---|
| 13 |  | 
|---|
| 14 | static inline __wsum csum_finalize_sum(u64 temp64) | 
|---|
| 15 | { | 
|---|
| 16 | return (__force __wsum)((temp64 + ror64(word: temp64, shift: 32)) >> 32); | 
|---|
| 17 | } | 
|---|
| 18 |  | 
|---|
| 19 | static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5]) | 
|---|
| 20 | { | 
|---|
| 21 | asm( "addq %1,%0\n\t" | 
|---|
| 22 | "adcq %2,%0\n\t" | 
|---|
| 23 | "adcq %3,%0\n\t" | 
|---|
| 24 | "adcq %4,%0\n\t" | 
|---|
| 25 | "adcq %5,%0\n\t" | 
|---|
| 26 | "adcq $0,%0" | 
|---|
| 27 | : "+r"(sum) | 
|---|
| 28 | : "m"(m[0]), "m"(m[1]), "m"(m[2]), | 
|---|
| 29 | "m"(m[3]), "m"(m[4])); | 
|---|
| 30 | return sum; | 
|---|
| 31 | } | 
|---|
| 32 |  | 
|---|
| 33 | /* | 
|---|
| 34 | * Do a checksum on an arbitrary memory area. | 
|---|
| 35 | * Returns a 32bit checksum. | 
|---|
| 36 | * | 
|---|
| 37 | * This isn't as time critical as it used to be because many NICs | 
|---|
| 38 | * do hardware checksumming these days. | 
|---|
| 39 | * | 
|---|
| 40 | * Still, with CHECKSUM_COMPLETE this is called to compute | 
|---|
| 41 | * checksums on IPv6 headers (40 bytes) and other small parts. | 
|---|
| 42 | * it's best to have buff aligned on a 64-bit boundary | 
|---|
| 43 | */ | 
|---|
| 44 | __wsum csum_partial(const void *buff, int len, __wsum sum) | 
|---|
| 45 | { | 
|---|
| 46 | u64 temp64 = (__force u64)sum; | 
|---|
| 47 |  | 
|---|
| 48 | /* Do two 40-byte chunks in parallel to get better ILP */ | 
|---|
| 49 | if (likely(len >= 80)) { | 
|---|
| 50 | u64 temp64_2 = 0; | 
|---|
| 51 | do { | 
|---|
| 52 | temp64 = update_csum_40b(sum: temp64, m: buff); | 
|---|
| 53 | temp64_2 = update_csum_40b(sum: temp64_2, m: buff + 40); | 
|---|
| 54 | buff += 80; | 
|---|
| 55 | len -= 80; | 
|---|
| 56 | } while (len >= 80); | 
|---|
| 57 |  | 
|---|
| 58 | asm( "addq %1,%0\n\t" | 
|---|
| 59 | "adcq $0,%0" | 
|---|
| 60 | : "+r"(temp64): "r"(temp64_2)); | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | /* | 
|---|
| 64 | * len == 40 is the hot case due to IPv6 headers, so return | 
|---|
| 65 | * early for that exact case without checking the tail bytes. | 
|---|
| 66 | */ | 
|---|
| 67 | if (len >= 40) { | 
|---|
| 68 | temp64 = update_csum_40b(sum: temp64, m: buff); | 
|---|
| 69 | len -= 40; | 
|---|
| 70 | if (!len) | 
|---|
| 71 | return csum_finalize_sum(temp64); | 
|---|
| 72 | buff += 40; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | if (len & 32) { | 
|---|
| 76 | asm( "addq 0*8(%[src]),%[res]\n\t" | 
|---|
| 77 | "adcq 1*8(%[src]),%[res]\n\t" | 
|---|
| 78 | "adcq 2*8(%[src]),%[res]\n\t" | 
|---|
| 79 | "adcq 3*8(%[src]),%[res]\n\t" | 
|---|
| 80 | "adcq $0,%[res]" | 
|---|
| 81 | : [res] "+r"(temp64) | 
|---|
| 82 | : [src] "r"(buff), "m"(*(const char(*)[32])buff)); | 
|---|
| 83 | buff += 32; | 
|---|
| 84 | } | 
|---|
| 85 | if (len & 16) { | 
|---|
| 86 | asm( "addq 0*8(%[src]),%[res]\n\t" | 
|---|
| 87 | "adcq 1*8(%[src]),%[res]\n\t" | 
|---|
| 88 | "adcq $0,%[res]" | 
|---|
| 89 | : [res] "+r"(temp64) | 
|---|
| 90 | : [src] "r"(buff), "m"(*(const char(*)[16])buff)); | 
|---|
| 91 | buff += 16; | 
|---|
| 92 | } | 
|---|
| 93 | if (len & 8) { | 
|---|
| 94 | asm( "addq 0*8(%[src]),%[res]\n\t" | 
|---|
| 95 | "adcq $0,%[res]" | 
|---|
| 96 | : [res] "+r"(temp64) | 
|---|
| 97 | : [src] "r"(buff), "m"(*(const char(*)[8])buff)); | 
|---|
| 98 | buff += 8; | 
|---|
| 99 | } | 
|---|
| 100 | if (len & 7) { | 
|---|
| 101 | unsigned int shift = (-len << 3) & 63; | 
|---|
| 102 | unsigned long trail; | 
|---|
| 103 |  | 
|---|
| 104 | trail = (load_unaligned_zeropad(addr: buff) << shift) >> shift; | 
|---|
| 105 |  | 
|---|
| 106 | asm( "addq %[trail],%[res]\n\t" | 
|---|
| 107 | "adcq $0,%[res]" | 
|---|
| 108 | : [res] "+r"(temp64) | 
|---|
| 109 | : [trail] "r"(trail)); | 
|---|
| 110 | } | 
|---|
| 111 | return csum_finalize_sum(temp64); | 
|---|
| 112 | } | 
|---|
| 113 | EXPORT_SYMBOL(csum_partial); | 
|---|
| 114 |  | 
|---|
| 115 | /* | 
|---|
| 116 | * this routine is used for miscellaneous IP-like checksums, mainly | 
|---|
| 117 | * in icmp.c | 
|---|
| 118 | */ | 
|---|
| 119 | __sum16 ip_compute_csum(const void *buff, int len) | 
|---|
| 120 | { | 
|---|
| 121 | return csum_fold(sum: csum_partial(buff, len, 0)); | 
|---|
| 122 | } | 
|---|
| 123 | EXPORT_SYMBOL(ip_compute_csum); | 
|---|
| 124 |  | 
|---|