1  /*
2   * This program is free software; you can redistribute it and/or
3   * modify it under the terms of the GNU General Public License version 2
4   * as published by the Free Software Foundation; or, when distributed
5   * separately from the Linux kernel or incorporated into other
6   * software packages, subject to the following license:
7   *
8   * Permission is hereby granted, free of charge, to any person obtaining a copy
9   * of this source file (the "Software"), to deal in the Software without
10   * restriction, including without limitation the rights to use, copy, modify,
11   * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12   * and to permit persons to whom the Software is furnished to do so, subject to
13   * the following conditions:
14   *
15   * The above copyright notice and this permission notice shall be included in
16   * all copies or substantial portions of the Software.
17   *
18   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24   * IN THE SOFTWARE.
25   */
26  
27  #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
28  #define __XEN_BLKIF__BACKEND__COMMON_H__
29  
30  #include <linux/module.h>
31  #include <linux/interrupt.h>
32  #include <linux/slab.h>
33  #include <linux/blkdev.h>
34  #include <linux/vmalloc.h>
35  #include <linux/wait.h>
36  #include <linux/io.h>
37  #include <linux/rbtree.h>
38  #include <asm/setup.h>
39  #include <asm/hypervisor.h>
40  #include <xen/grant_table.h>
41  #include <xen/page.h>
42  #include <xen/xenbus.h>
43  #include <xen/interface/io/ring.h>
44  #include <xen/interface/io/blkif.h>
45  #include <xen/interface/io/protocols.h>
46  
47  extern unsigned int xen_blkif_max_ring_order;
48  extern unsigned int xenblk_max_queues;
49  /*
50   * This is the maximum number of segments that would be allowed in indirect
51   * requests. This value will also be passed to the frontend.
52   */
53  #define MAX_INDIRECT_SEGMENTS 256
54  
55  /*
56   * Xen use 4K pages. The guest may use different page size (4K or 64K)
57   * Number of Xen pages per segment
58   */
59  #define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
60  
61  #define XEN_PAGES_PER_INDIRECT_FRAME \
62  	(XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
63  #define SEGS_PER_INDIRECT_FRAME	\
64  	(XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
65  
66  #define MAX_INDIRECT_PAGES \
67  	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
68  #define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
69  
70  /* Not a real protocol.  Used to generate ring structs which contain
71   * the elements common to all protocols only.  This way we get a
72   * compiler-checkable way to use common struct elements, so we can
73   * avoid using switch(protocol) in a number of places.  */
74  struct blkif_common_request {
75  	char dummy;
76  };
77  
78  /* i386 protocol version */
79  
80  struct blkif_x86_32_request_rw {
81  	uint8_t        nr_segments;  /* number of segments                   */
82  	blkif_vdev_t   handle;       /* only for read/write requests         */
83  	uint64_t       id;           /* private guest value, echoed in resp  */
84  	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
85  	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
86  } __attribute__((__packed__));
87  
88  struct blkif_x86_32_request_discard {
89  	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
90  	blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
91  	uint64_t       id;           /* private guest value, echoed in resp  */
92  	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
93  	uint64_t       nr_sectors;
94  } __attribute__((__packed__));
95  
96  struct blkif_x86_32_request_other {
97  	uint8_t        _pad1;
98  	blkif_vdev_t   _pad2;
99  	uint64_t       id;           /* private guest value, echoed in resp  */
100  } __attribute__((__packed__));
101  
102  struct blkif_x86_32_request_indirect {
103  	uint8_t        indirect_op;
104  	uint16_t       nr_segments;
105  	uint64_t       id;
106  	blkif_sector_t sector_number;
107  	blkif_vdev_t   handle;
108  	uint16_t       _pad1;
109  	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
110  	/*
111  	 * The maximum number of indirect segments (and pages) that will
112  	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
113  	 * is also exported to the guest (via xenstore
114  	 * feature-max-indirect-segments entry), so the frontend knows how
115  	 * many indirect segments the backend supports.
116  	 */
117  	uint64_t       _pad2;        /* make it 64 byte aligned */
118  } __attribute__((__packed__));
119  
120  struct blkif_x86_32_request {
121  	uint8_t        operation;    /* BLKIF_OP_???                         */
122  	union {
123  		struct blkif_x86_32_request_rw rw;
124  		struct blkif_x86_32_request_discard discard;
125  		struct blkif_x86_32_request_other other;
126  		struct blkif_x86_32_request_indirect indirect;
127  	} u;
128  } __attribute__((__packed__));
129  
130  /* x86_64 protocol version */
131  
132  struct blkif_x86_64_request_rw {
133  	uint8_t        nr_segments;  /* number of segments                   */
134  	blkif_vdev_t   handle;       /* only for read/write requests         */
135  	uint32_t       _pad1;        /* offsetof(blkif_request..,u.rw.id)==8 */
136  	uint64_t       id;
137  	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
138  	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
139  } __attribute__((__packed__));
140  
141  struct blkif_x86_64_request_discard {
142  	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
143  	blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
144          uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
145  	uint64_t       id;
146  	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
147  	uint64_t       nr_sectors;
148  } __attribute__((__packed__));
149  
150  struct blkif_x86_64_request_other {
151  	uint8_t        _pad1;
152  	blkif_vdev_t   _pad2;
153  	uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
154  	uint64_t       id;           /* private guest value, echoed in resp  */
155  } __attribute__((__packed__));
156  
157  struct blkif_x86_64_request_indirect {
158  	uint8_t        indirect_op;
159  	uint16_t       nr_segments;
160  	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
161  	uint64_t       id;
162  	blkif_sector_t sector_number;
163  	blkif_vdev_t   handle;
164  	uint16_t       _pad2;
165  	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
166  	/*
167  	 * The maximum number of indirect segments (and pages) that will
168  	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
169  	 * is also exported to the guest (via xenstore
170  	 * feature-max-indirect-segments entry), so the frontend knows how
171  	 * many indirect segments the backend supports.
172  	 */
173  	uint32_t       _pad3;        /* make it 64 byte aligned */
174  } __attribute__((__packed__));
175  
176  struct blkif_x86_64_request {
177  	uint8_t        operation;    /* BLKIF_OP_???                         */
178  	union {
179  		struct blkif_x86_64_request_rw rw;
180  		struct blkif_x86_64_request_discard discard;
181  		struct blkif_x86_64_request_other other;
182  		struct blkif_x86_64_request_indirect indirect;
183  	} u;
184  } __attribute__((__packed__));
185  
186  DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
187  		  struct blkif_response);
188  DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
189  		  struct blkif_response __packed);
190  DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
191  		  struct blkif_response);
192  
193  union blkif_back_rings {
194  	struct blkif_back_ring        native;
195  	struct blkif_common_back_ring common;
196  	struct blkif_x86_32_back_ring x86_32;
197  	struct blkif_x86_64_back_ring x86_64;
198  };
199  
200  enum blkif_protocol {
201  	BLKIF_PROTOCOL_NATIVE = 1,
202  	BLKIF_PROTOCOL_X86_32 = 2,
203  	BLKIF_PROTOCOL_X86_64 = 3,
204  };
205  
206  /*
207   * Default protocol if the frontend doesn't specify one.
208   */
209  #ifdef CONFIG_X86
210  #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
211  #else
212  #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
213  #endif
214  
215  struct xen_vbd {
216  	/* What the domain refers to this vbd as. */
217  	blkif_vdev_t		handle;
218  	/* Non-zero -> read-only */
219  	unsigned char		readonly;
220  	/* VDISK_xxx */
221  	unsigned char		type;
222  	/* phys device that this vbd maps to. */
223  	u32			pdevice;
224  	struct file		*bdev_file;
225  	/* Cached size parameter. */
226  	sector_t		size;
227  	unsigned int		flush_support:1;
228  	unsigned int		discard_secure:1;
229  	/* Connect-time cached feature_persistent parameter value */
230  	unsigned int		feature_gnt_persistent_parm:1;
231  	/* Persistent grants feature negotiation result */
232  	unsigned int		feature_gnt_persistent:1;
233  	unsigned int		overflow_max_grants:1;
234  };
235  
236  struct backend_info;
237  
238  /* Number of requests that we can fit in a ring */
239  #define XEN_BLKIF_REQS_PER_PAGE		32
240  
241  struct persistent_gnt {
242  	struct page *page;
243  	grant_ref_t gnt;
244  	grant_handle_t handle;
245  	unsigned long last_used;
246  	bool active;
247  	struct rb_node node;
248  	struct list_head remove_node;
249  };
250  
251  /* Per-ring information. */
252  struct xen_blkif_ring {
253  	/* Physical parameters of the comms window. */
254  	unsigned int		irq;
255  	union blkif_back_rings	blk_rings;
256  	void			*blk_ring;
257  	/* Private fields. */
258  	spinlock_t		blk_ring_lock;
259  
260  	wait_queue_head_t	wq;
261  	atomic_t		inflight;
262  	bool			active;
263  	/* One thread per blkif ring. */
264  	struct task_struct	*xenblkd;
265  	unsigned int		waiting_reqs;
266  
267  	/* List of all 'pending_req' available */
268  	struct list_head	pending_free;
269  	/* And its spinlock. */
270  	spinlock_t		pending_free_lock;
271  	wait_queue_head_t	pending_free_wq;
272  
273  	/* Tree to store persistent grants. */
274  	struct rb_root		persistent_gnts;
275  	unsigned int		persistent_gnt_c;
276  	atomic_t		persistent_gnt_in_use;
277  	unsigned long           next_lru;
278  
279  	/* Statistics. */
280  	unsigned long		st_print;
281  	unsigned long long	st_rd_req;
282  	unsigned long long	st_wr_req;
283  	unsigned long long	st_oo_req;
284  	unsigned long long	st_f_req;
285  	unsigned long long	st_ds_req;
286  	unsigned long long	st_rd_sect;
287  	unsigned long long	st_wr_sect;
288  
289  	/* Used by the kworker that offload work from the persistent purge. */
290  	struct list_head	persistent_purge_list;
291  	struct work_struct	persistent_purge_work;
292  
293  	/* Buffer of free pages to map grant refs. */
294  	struct gnttab_page_cache free_pages;
295  
296  	struct work_struct	free_work;
297  	/* Thread shutdown wait queue. */
298  	wait_queue_head_t	shutdown_wq;
299  	struct xen_blkif	*blkif;
300  };
301  
302  struct xen_blkif {
303  	/* Unique identifier for this interface. */
304  	domid_t			domid;
305  	unsigned int		handle;
306  	/* Comms information. */
307  	enum blkif_protocol	blk_protocol;
308  	/* The VBD attached to this interface. */
309  	struct xen_vbd		vbd;
310  	/* Back pointer to the backend_info. */
311  	struct backend_info	*be;
312  	atomic_t		refcnt;
313  	/* for barrier (drain) requests */
314  	struct completion	drain_complete;
315  	atomic_t		drain;
316  
317  	struct work_struct	free_work;
318  	unsigned int		nr_ring_pages;
319  	bool			multi_ref;
320  	/* All rings for this device. */
321  	struct xen_blkif_ring	*rings;
322  	unsigned int		nr_rings;
323  	unsigned long		buffer_squeeze_end;
324  };
325  
326  struct seg_buf {
327  	unsigned long offset;
328  	unsigned int nsec;
329  };
330  
331  struct grant_page {
332  	struct page		*page;
333  	struct persistent_gnt	*persistent_gnt;
334  	grant_handle_t		handle;
335  	grant_ref_t		gref;
336  };
337  
338  /*
339   * Each outstanding request that we've passed to the lower device layers has a
340   * 'pending_req' allocated to it. Each buffer_head that completes decrements
341   * the pendcnt towards zero. When it hits zero, the specified domain has a
342   * response queued for it, with the saved 'id' passed back.
343   */
344  struct pending_req {
345  	struct xen_blkif_ring   *ring;
346  	u64			id;
347  	int			nr_segs;
348  	atomic_t		pendcnt;
349  	unsigned short		operation;
350  	int			status;
351  	struct list_head	free_list;
352  	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
353  	/* Indirect descriptors */
354  	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
355  	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
356  	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
357  	struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
358  	struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
359  	struct gntab_unmap_queue_data gnttab_unmap_data;
360  };
361  
362  
363  #define vbd_sz(_v)	bdev_nr_sectors(file_bdev((_v)->bdev_file))
364  
365  #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
366  #define xen_blkif_put(_b)				\
367  	do {						\
368  		if (atomic_dec_and_test(&(_b)->refcnt))	\
369  			schedule_work(&(_b)->free_work);\
370  	} while (0)
371  
372  struct phys_req {
373  	unsigned short		dev;
374  	blkif_sector_t		nr_sects;
375  	struct block_device	*bdev;
376  	blkif_sector_t		sector_number;
377  };
378  
379  int xen_blkif_interface_init(void);
380  void xen_blkif_interface_fini(void);
381  
382  int xen_blkif_xenbus_init(void);
383  void xen_blkif_xenbus_fini(void);
384  
385  irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
386  int xen_blkif_schedule(void *arg);
387  void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
388  
389  int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
390  			      struct backend_info *be, int state);
391  
392  int xen_blkbk_barrier(struct xenbus_transaction xbt,
393  		      struct backend_info *be, int state);
394  struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
395  void xen_blkbk_unmap_purged_grants(struct work_struct *work);
396  
397  #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
398