aboutsummaryrefslogtreecommitdiff
path: root/datapath/tunnel.h
blob: 6865ae61193b3088dae635ce3e20584d7d536202 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/*
 * Copyright (c) 2007-2011 Nicira Networks.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA
 */

#ifndef TUNNEL_H
#define TUNNEL_H 1

#include <linux/version.h>

#include "flow.h"
#include "openvswitch/tunnel.h"
#include "vport.h"

/*
 * The absolute minimum fragment size.  Note that there are many other
 * definitions of the minimum MTU.
 */
#define IP_MIN_MTU 68

/*
 * One of these goes in struct tnl_ops and in tnl_find_port().
 * These values are in the same namespace as other TNL_T_* values, so
 * only the least significant 10 bits are available to define protocol
 * identifiers.
 */
#define TNL_T_PROTO_GRE		0
#define TNL_T_PROTO_CAPWAP	1

/* These flags are only needed when calling tnl_find_port(). */
#define TNL_T_KEY_EXACT		(1 << 10)
#define TNL_T_KEY_MATCH		(1 << 11)

/* Private flags not exposed to userspace in this form. */
#define TNL_F_IN_KEY_MATCH	(1 << 16) /* Store the key in tun_id to
					   * match in flow table. */
#define TNL_F_OUT_KEY_ACTION	(1 << 17) /* Get the key from a SET_TUNNEL
					   * action. */

/* All public tunnel flags. */
#define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
		      TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
		      TNL_F_HDR_CACHE | TNL_F_IPSEC)

/**
 * struct port_lookup_key - Tunnel port key, used as hash table key.
 * @in_key: Key to match on input, 0 for wildcard.
 * @saddr: IPv4 source address to match, 0 to accept any source address.
 * @daddr: IPv4 destination of tunnel.
 * @tunnel_type: Set of TNL_T_* flags that define lookup.
 */
struct port_lookup_key {
	__be64 in_key;
	__be32 saddr;
	__be32 daddr;
	u32    tunnel_type;
};

#define PORT_KEY_LEN	(offsetof(struct port_lookup_key, tunnel_type) + \
			 FIELD_SIZEOF(struct port_lookup_key, tunnel_type))

/**
 * struct tnl_mutable_config - modifiable configuration for a tunnel.
 * @key: Used as key for tunnel port.  Configured via OVS_TUNNEL_ATTR_*
 * attributes.
 * @rcu: RCU callback head for deferred destruction.
 * @seq: Sequence number for distinguishing configuration versions.
 * @tunnel_hlen: Tunnel header length.
 * @eth_addr: Source address for packets generated by tunnel itself
 * (e.g. ICMP fragmentation needed messages).
 * @out_key: Key to use on output, 0 if this tunnel has no fixed output key.
 * @flags: TNL_F_* flags.
 * @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS.
 * @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL.
 */
struct tnl_mutable_config {
	struct port_lookup_key key;
	struct rcu_head rcu;

	unsigned seq;

	unsigned tunnel_hlen;

	unsigned char eth_addr[ETH_ALEN];

	/* Configured via OVS_TUNNEL_ATTR_* attributes. */
	__be64	out_key;
	u32	flags;
	u8	tos;
	u8	ttl;

	/* Multicast configuration. */
	int	mlink;
};

struct tnl_ops {
	u32 tunnel_type;	/* Put the TNL_T_PROTO_* type in here. */
	u8 ipproto;		/* The IP protocol for the tunnel. */

	/*
	 * Returns the length of the tunnel header that will be added in
	 * build_header() (i.e. excludes the IP header).  Returns a negative
	 * error code if the configuration is invalid.
	 */
	int (*hdr_len)(const struct tnl_mutable_config *);

	/*
	 * Builds the static portion of the tunnel header, which is stored in
	 * the header cache.  In general the performance of this function is
	 * not too important as we try to only call it when building the cache
	 * so it is preferable to shift as much work as possible here.  However,
	 * in some circumstances caching is disabled and this function will be
	 * called for every packet, so try not to make it too slow.
	 */
	void (*build_header)(const struct vport *,
			     const struct tnl_mutable_config *, void *header);

	/*
	 * Updates the cached header of a packet to match the actual packet
	 * data.  Typical things that might need to be updated are length,
	 * checksum, etc.  The IP header will have already been updated and this
	 * is the final step before transmission.  Returns a linked list of
	 * completed SKBs (multiple packets may be generated in the event
	 * of fragmentation).
	 */
	struct sk_buff *(*update_header)(const struct vport *,
					 const struct tnl_mutable_config *,
					 struct dst_entry *, struct sk_buff *);
};

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
/*
 * On these kernels we have a fast mechanism to tell if the ARP cache for a
 * particular destination has changed.
 */
#define HAVE_HH_SEQ
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
/*
 * On these kernels we have a fast mechanism to tell if the routing table
 * has changed.
 */
#define HAVE_RT_GENID
#endif
#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
/* If we can't detect all system changes directly we need to use a timeout. */
#define NEED_CACHE_TIMEOUT
#endif
struct tnl_cache {
	struct rcu_head rcu;

	int len;		/* Length of data to be memcpy'd from cache. */
	int hh_len;		/* Hardware hdr length, cached from hh_cache. */

	/* Sequence number of mutable->seq from which this cache was
	 * generated. */
	unsigned mutable_seq;

#ifdef HAVE_HH_SEQ
	/*
	 * The sequence number from the seqlock protecting the hardware header
	 * cache (in the ARP cache).  Since every write increments the counter
	 * this gives us an easy way to tell if it has changed.
	 */
	unsigned hh_seq;
#endif

#ifdef NEED_CACHE_TIMEOUT
	/*
	 * If we don't have direct mechanisms to detect all important changes in
	 * the system fall back to an expiration time.  This expiration time
	 * can be relatively short since at high rates there will be millions of
	 * packets per second, so we'll still get plenty of benefit from the
	 * cache.  Note that if something changes we may blackhole packets
	 * until the expiration time (depending on what changed and the kernel
	 * version we may be able to detect the change sooner).  Expiration is
	 * expressed as a time in jiffies.
	 */
	unsigned long expiration;
#endif

	/*
	 * The routing table entry that is the result of looking up the tunnel
	 * endpoints.  It also contains a sequence number (called a generation
	 * ID) that can be compared to a global sequence to tell if the routing
	 * table has changed (and therefore there is a potential that this
	 * cached route has been invalidated).
	 */
	struct rtable *rt;

	/*
	 * If the output device for tunnel traffic is an OVS internal device,
	 * the flow of that datapath.  Since all tunnel traffic will have the
	 * same headers this allows us to cache the flow lookup.  NULL if the
	 * output device is not OVS or if there is no flow installed.
	 */
	struct sw_flow *flow;

	/* The cached header follows after padding for alignment. */
};

struct tnl_vport {
	struct rcu_head rcu;
	struct hlist_node hash_node;

	char name[IFNAMSIZ];
	const struct tnl_ops *tnl_ops;

	struct tnl_mutable_config __rcu *mutable;

	/*
	 * ID of last fragment sent (for tunnel protocols with direct support
	 * fragmentation).  If the protocol relies on IP fragmentation then
	 * this is not needed.
	 */
	atomic_t frag_id;

	spinlock_t cache_lock;
	struct tnl_cache __rcu *cache;	/* Protected by RCU/cache_lock. */

#ifdef NEED_CACHE_TIMEOUT
	/*
	 * If we must rely on expiration time to invalidate the cache, this is
	 * the interval.  It is randomized within a range (defined by
	 * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
	 * by creation of a large number of tunnels at a one time.
	 */
	unsigned long cache_exp_interval;
#endif
};

struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
			     const struct tnl_ops *);
void ovs_tnl_destroy(struct vport *);

int ovs_tnl_set_options(struct vport *, struct nlattr *);
int ovs_tnl_get_options(const struct vport *, struct sk_buff *);

int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
const char *ovs_tnl_get_name(const struct vport *vport);
const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);

struct vport *ovs_tnl_find_port(__be32 saddr, __be32 daddr, __be64 key,
				int tunnel_type,
				const struct tnl_mutable_config **mutable);
bool ovs_tnl_frag_needed(struct vport *vport,
			 const struct tnl_mutable_config *mutable,
			 struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
void ovs_tnl_free_linked_skbs(struct sk_buff *skb);

int ovs_tnl_init(void);
void ovs_tnl_exit(void);
static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
{
	return vport_priv(vport);
}

#endif /* tunnel.h */