summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-30 08:50:57 +0300
committerDavid S. Miller <davem@davemloft.net>2016-09-30 08:50:57 +0300
commitbcdc6efabda3ba6a67f4cb8915873e7d6759b7e6 (patch)
treed977daa27ad975a6acbc313ec792592f4cf50847 /include
parentfa1403548daf3a2c8c988f89db1053df70200405 (diff)
parent6d4a741cbbfa6612a479656654ca5edf7becc72c (diff)
downloadlinux-bcdc6efabda3ba6a67f4cb8915873e7d6759b7e6.tar.xz
Merge branch 'net_proc_perf'
Jia He says: ==================== Reduce cache miss for snmp_fold_field In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. test source code: ================ My simple test case, which read from the procfs items endlessly: /***********************************************************/ int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0xffffffff;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**********************************************************/ compile and run: ================ gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: ==================== Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses # 0.11% of all LL-cache hits [40.02%] 6.245162638 seconds time elapsed After the patch set: =================== Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses # 2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses # 0.25% of all LL-cache hits [39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% changelog ========= v6: - correct v5 v5: - order local variables from longest to shortest line v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/ip.h23
1 files changed, 23 insertions, 0 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 9742b92dc933..bc43c0fcae12 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
}
#endif
+#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
+{ \
+ int i, c; \
+ for_each_possible_cpu(c) { \
+ for (i = 0; stats_list[i].name; i++) \
+ buff64[i] += snmp_get_cpu_field64( \
+ mib_statistic, \
+ c, stats_list[i].entry, \
+ offset); \
+ } \
+}
+
+#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
+{ \
+ int i, c; \
+ for_each_possible_cpu(c) { \
+ for (i = 0; stats_list[i].name; i++) \
+ buff[i] += snmp_get_cpu_field( \
+ mib_statistic, \
+ c, stats_list[i].entry); \
+ } \
+}
+
void inet_get_local_port_range(struct net *net, int *low, int *high);
#ifdef CONFIG_SYSCTL