1  /******************************************************************************
2   * xenbus_comms.c
3   *
4   * Low level code to talks to Xen Store: ringbuffer and event channel.
5   *
6   * Copyright (C) 2005 Rusty Russell, IBM Corporation
7   *
8   * This program is free software; you can redistribute it and/or
9   * modify it under the terms of the GNU General Public License version 2
10   * as published by the Free Software Foundation; or, when distributed
11   * separately from the Linux kernel or incorporated into other
12   * software packages, subject to the following license:
13   *
14   * Permission is hereby granted, free of charge, to any person obtaining a copy
15   * of this source file (the "Software"), to deal in the Software without
16   * restriction, including without limitation the rights to use, copy, modify,
17   * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18   * and to permit persons to whom the Software is furnished to do so, subject to
19   * the following conditions:
20   *
21   * The above copyright notice and this permission notice shall be included in
22   * all copies or substantial portions of the Software.
23   *
24   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30   * IN THE SOFTWARE.
31   */
32  
33  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34  
35  #include <linux/wait.h>
36  #include <linux/interrupt.h>
37  #include <linux/kthread.h>
38  #include <linux/sched.h>
39  #include <linux/err.h>
40  #include <xen/xenbus.h>
41  #include <asm/xen/hypervisor.h>
42  #include <xen/events.h>
43  #include <xen/page.h>
44  #include "xenbus.h"
45  
46  /* A list of replies. Currently only one will ever be outstanding. */
47  LIST_HEAD(xs_reply_list);
48  
49  /* A list of write requests. */
50  LIST_HEAD(xb_write_list);
51  DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52  DEFINE_MUTEX(xb_write_mutex);
53  
54  /* Protect xenbus reader thread against save/restore. */
55  DEFINE_MUTEX(xs_response_mutex);
56  
57  static int xenbus_irq;
58  static struct task_struct *xenbus_task;
59  
wake_waiting(int irq,void * unused)60  static irqreturn_t wake_waiting(int irq, void *unused)
61  {
62  	wake_up(&xb_waitq);
63  	return IRQ_HANDLED;
64  }
65  
check_indexes(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod)66  static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
67  {
68  	return ((prod - cons) <= XENSTORE_RING_SIZE);
69  }
70  
get_output_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,char * buf,uint32_t * len)71  static void *get_output_chunk(XENSTORE_RING_IDX cons,
72  			      XENSTORE_RING_IDX prod,
73  			      char *buf, uint32_t *len)
74  {
75  	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
76  	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
77  		*len = XENSTORE_RING_SIZE - (prod - cons);
78  	return buf + MASK_XENSTORE_IDX(prod);
79  }
80  
get_input_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,const char * buf,uint32_t * len)81  static const void *get_input_chunk(XENSTORE_RING_IDX cons,
82  				   XENSTORE_RING_IDX prod,
83  				   const char *buf, uint32_t *len)
84  {
85  	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
86  	if ((prod - cons) < *len)
87  		*len = prod - cons;
88  	return buf + MASK_XENSTORE_IDX(cons);
89  }
90  
xb_data_to_write(void)91  static int xb_data_to_write(void)
92  {
93  	struct xenstore_domain_interface *intf = xen_store_interface;
94  
95  	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
96  		!list_empty(&xb_write_list);
97  }
98  
99  /**
100   * xb_write - low level write
101   * @data: buffer to send
102   * @len: length of buffer
103   *
104   * Returns number of bytes written or -err.
105   */
xb_write(const void * data,unsigned int len)106  static int xb_write(const void *data, unsigned int len)
107  {
108  	struct xenstore_domain_interface *intf = xen_store_interface;
109  	XENSTORE_RING_IDX cons, prod;
110  	unsigned int bytes = 0;
111  
112  	while (len != 0) {
113  		void *dst;
114  		unsigned int avail;
115  
116  		/* Read indexes, then verify. */
117  		cons = intf->req_cons;
118  		prod = intf->req_prod;
119  		if (!check_indexes(cons, prod)) {
120  			intf->req_cons = intf->req_prod = 0;
121  			return -EIO;
122  		}
123  		if (!xb_data_to_write())
124  			return bytes;
125  
126  		/* Must write data /after/ reading the consumer index. */
127  		virt_mb();
128  
129  		dst = get_output_chunk(cons, prod, intf->req, &avail);
130  		if (avail == 0)
131  			continue;
132  		if (avail > len)
133  			avail = len;
134  
135  		memcpy(dst, data, avail);
136  		data += avail;
137  		len -= avail;
138  		bytes += avail;
139  
140  		/* Other side must not see new producer until data is there. */
141  		virt_wmb();
142  		intf->req_prod += avail;
143  
144  		/* Implies mb(): other side will see the updated producer. */
145  		if (prod <= intf->req_cons)
146  			notify_remote_via_evtchn(xen_store_evtchn);
147  	}
148  
149  	return bytes;
150  }
151  
xb_data_to_read(void)152  static int xb_data_to_read(void)
153  {
154  	struct xenstore_domain_interface *intf = xen_store_interface;
155  	return (intf->rsp_cons != intf->rsp_prod);
156  }
157  
xb_read(void * data,unsigned int len)158  static int xb_read(void *data, unsigned int len)
159  {
160  	struct xenstore_domain_interface *intf = xen_store_interface;
161  	XENSTORE_RING_IDX cons, prod;
162  	unsigned int bytes = 0;
163  
164  	while (len != 0) {
165  		unsigned int avail;
166  		const char *src;
167  
168  		/* Read indexes, then verify. */
169  		cons = intf->rsp_cons;
170  		prod = intf->rsp_prod;
171  		if (cons == prod)
172  			return bytes;
173  
174  		if (!check_indexes(cons, prod)) {
175  			intf->rsp_cons = intf->rsp_prod = 0;
176  			return -EIO;
177  		}
178  
179  		src = get_input_chunk(cons, prod, intf->rsp, &avail);
180  		if (avail == 0)
181  			continue;
182  		if (avail > len)
183  			avail = len;
184  
185  		/* Must read data /after/ reading the producer index. */
186  		virt_rmb();
187  
188  		memcpy(data, src, avail);
189  		data += avail;
190  		len -= avail;
191  		bytes += avail;
192  
193  		/* Other side must not see free space until we've copied out */
194  		virt_mb();
195  		intf->rsp_cons += avail;
196  
197  		/* Implies mb(): other side will see the updated consumer. */
198  		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
199  			notify_remote_via_evtchn(xen_store_evtchn);
200  	}
201  
202  	return bytes;
203  }
204  
process_msg(void)205  static int process_msg(void)
206  {
207  	static struct {
208  		struct xsd_sockmsg msg;
209  		char *body;
210  		union {
211  			void *alloc;
212  			struct xs_watch_event *watch;
213  		};
214  		bool in_msg;
215  		bool in_hdr;
216  		unsigned int read;
217  	} state;
218  	struct xb_req_data *req;
219  	int err;
220  	unsigned int len;
221  
222  	if (!state.in_msg) {
223  		state.in_msg = true;
224  		state.in_hdr = true;
225  		state.read = 0;
226  
227  		/*
228  		 * We must disallow save/restore while reading a message.
229  		 * A partial read across s/r leaves us out of sync with
230  		 * xenstored.
231  		 * xs_response_mutex is locked as long as we are processing one
232  		 * message. state.in_msg will be true as long as we are holding
233  		 * the lock here.
234  		 */
235  		mutex_lock(&xs_response_mutex);
236  
237  		if (!xb_data_to_read()) {
238  			/* We raced with save/restore: pending data 'gone'. */
239  			mutex_unlock(&xs_response_mutex);
240  			state.in_msg = false;
241  			return 0;
242  		}
243  	}
244  
245  	if (state.in_hdr) {
246  		if (state.read != sizeof(state.msg)) {
247  			err = xb_read((void *)&state.msg + state.read,
248  				      sizeof(state.msg) - state.read);
249  			if (err < 0)
250  				goto out;
251  			state.read += err;
252  			if (state.read != sizeof(state.msg))
253  				return 0;
254  			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
255  				err = -EINVAL;
256  				goto out;
257  			}
258  		}
259  
260  		len = state.msg.len + 1;
261  		if (state.msg.type == XS_WATCH_EVENT)
262  			len += sizeof(*state.watch);
263  
264  		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
265  		if (!state.alloc)
266  			return -ENOMEM;
267  
268  		if (state.msg.type == XS_WATCH_EVENT)
269  			state.body = state.watch->body;
270  		else
271  			state.body = state.alloc;
272  		state.in_hdr = false;
273  		state.read = 0;
274  	}
275  
276  	err = xb_read(state.body + state.read, state.msg.len - state.read);
277  	if (err < 0)
278  		goto out;
279  
280  	state.read += err;
281  	if (state.read != state.msg.len)
282  		return 0;
283  
284  	state.body[state.msg.len] = '\0';
285  
286  	if (state.msg.type == XS_WATCH_EVENT) {
287  		state.watch->len = state.msg.len;
288  		err = xs_watch_msg(state.watch);
289  	} else {
290  		err = -ENOENT;
291  		mutex_lock(&xb_write_mutex);
292  		list_for_each_entry(req, &xs_reply_list, list) {
293  			if (req->msg.req_id == state.msg.req_id) {
294  				list_del(&req->list);
295  				err = 0;
296  				break;
297  			}
298  		}
299  		mutex_unlock(&xb_write_mutex);
300  		if (err)
301  			goto out;
302  
303  		if (req->state == xb_req_state_wait_reply) {
304  			req->msg.req_id = req->caller_req_id;
305  			req->msg.type = state.msg.type;
306  			req->msg.len = state.msg.len;
307  			req->body = state.body;
308  			/* write body, then update state */
309  			virt_wmb();
310  			req->state = xb_req_state_got_reply;
311  			req->cb(req);
312  		} else
313  			kfree(req);
314  	}
315  
316  	mutex_unlock(&xs_response_mutex);
317  
318  	state.in_msg = false;
319  	state.alloc = NULL;
320  	return err;
321  
322   out:
323  	mutex_unlock(&xs_response_mutex);
324  	state.in_msg = false;
325  	kfree(state.alloc);
326  	state.alloc = NULL;
327  	return err;
328  }
329  
process_writes(void)330  static int process_writes(void)
331  {
332  	static struct {
333  		struct xb_req_data *req;
334  		int idx;
335  		unsigned int written;
336  	} state;
337  	void *base;
338  	unsigned int len;
339  	int err = 0;
340  
341  	if (!xb_data_to_write())
342  		return 0;
343  
344  	mutex_lock(&xb_write_mutex);
345  
346  	if (!state.req) {
347  		state.req = list_first_entry(&xb_write_list,
348  					     struct xb_req_data, list);
349  		state.idx = -1;
350  		state.written = 0;
351  	}
352  
353  	if (state.req->state == xb_req_state_aborted)
354  		goto out_err;
355  
356  	while (state.idx < state.req->num_vecs) {
357  		if (state.idx < 0) {
358  			base = &state.req->msg;
359  			len = sizeof(state.req->msg);
360  		} else {
361  			base = state.req->vec[state.idx].iov_base;
362  			len = state.req->vec[state.idx].iov_len;
363  		}
364  		err = xb_write(base + state.written, len - state.written);
365  		if (err < 0)
366  			goto out_err;
367  		state.written += err;
368  		if (state.written != len)
369  			goto out;
370  
371  		state.idx++;
372  		state.written = 0;
373  	}
374  
375  	list_del(&state.req->list);
376  	state.req->state = xb_req_state_wait_reply;
377  	list_add_tail(&state.req->list, &xs_reply_list);
378  	state.req = NULL;
379  
380   out:
381  	mutex_unlock(&xb_write_mutex);
382  
383  	return 0;
384  
385   out_err:
386  	state.req->msg.type = XS_ERROR;
387  	state.req->err = err;
388  	list_del(&state.req->list);
389  	if (state.req->state == xb_req_state_aborted)
390  		kfree(state.req);
391  	else {
392  		/* write err, then update state */
393  		virt_wmb();
394  		state.req->state = xb_req_state_got_reply;
395  		wake_up(&state.req->wq);
396  	}
397  
398  	mutex_unlock(&xb_write_mutex);
399  
400  	state.req = NULL;
401  
402  	return err;
403  }
404  
xb_thread_work(void)405  static int xb_thread_work(void)
406  {
407  	return xb_data_to_read() || xb_data_to_write();
408  }
409  
xenbus_thread(void * unused)410  static int xenbus_thread(void *unused)
411  {
412  	int err;
413  
414  	while (!kthread_should_stop()) {
415  		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
416  			continue;
417  
418  		err = process_msg();
419  		if (err == -ENOMEM)
420  			schedule();
421  		else if (err)
422  			pr_warn_ratelimited("error %d while reading message\n",
423  					    err);
424  
425  		err = process_writes();
426  		if (err)
427  			pr_warn_ratelimited("error %d while writing message\n",
428  					    err);
429  	}
430  
431  	xenbus_task = NULL;
432  	return 0;
433  }
434  
435  /**
436   * xb_init_comms - Set up interrupt handler off store event channel.
437   */
xb_init_comms(void)438  int xb_init_comms(void)
439  {
440  	struct xenstore_domain_interface *intf = xen_store_interface;
441  
442  	if (intf->req_prod != intf->req_cons)
443  		pr_err("request ring is not quiescent (%08x:%08x)!\n",
444  		       intf->req_cons, intf->req_prod);
445  
446  	if (intf->rsp_prod != intf->rsp_cons) {
447  		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
448  			intf->rsp_cons, intf->rsp_prod);
449  		/* breaks kdump */
450  		if (!reset_devices)
451  			intf->rsp_cons = intf->rsp_prod;
452  	}
453  
454  	if (xenbus_irq) {
455  		/* Already have an irq; assume we're resuming */
456  		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
457  	} else {
458  		int err;
459  
460  		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
461  						0, "xenbus", &xb_waitq);
462  		if (err < 0) {
463  			pr_err("request irq failed %i\n", err);
464  			return err;
465  		}
466  
467  		xenbus_irq = err;
468  
469  		if (!xenbus_task) {
470  			xenbus_task = kthread_run(xenbus_thread, NULL,
471  						  "xenbus");
472  			if (IS_ERR(xenbus_task))
473  				return PTR_ERR(xenbus_task);
474  		}
475  	}
476  
477  	return 0;
478  }
479  
xb_deinit_comms(void)480  void xb_deinit_comms(void)
481  {
482  	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
483  	xenbus_irq = 0;
484  }
485