1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *	Local APIC handling, local APIC timers
4   *
5   *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6   *
7   *	Fixes
8   *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
9   *					thanks to Eric Gilmore
10   *					and Rolf G. Tews
11   *					for testing these extensively.
12   *	Maciej W. Rozycki	:	Various updates and fixes.
13   *	Mikael Pettersson	:	Power Management for UP-APIC.
14   *	Pavel Machek and
15   *	Mikael Pettersson	:	PM converted to driver model.
16   */
17  
18  #include <linux/perf_event.h>
19  #include <linux/kernel_stat.h>
20  #include <linux/mc146818rtc.h>
21  #include <linux/acpi_pmtmr.h>
22  #include <linux/bitmap.h>
23  #include <linux/clockchips.h>
24  #include <linux/interrupt.h>
25  #include <linux/memblock.h>
26  #include <linux/ftrace.h>
27  #include <linux/ioport.h>
28  #include <linux/export.h>
29  #include <linux/syscore_ops.h>
30  #include <linux/delay.h>
31  #include <linux/timex.h>
32  #include <linux/i8253.h>
33  #include <linux/dmar.h>
34  #include <linux/init.h>
35  #include <linux/cpu.h>
36  #include <linux/dmi.h>
37  #include <linux/smp.h>
38  #include <linux/mm.h>
39  
40  #include <xen/xen.h>
41  
42  #include <asm/trace/irq_vectors.h>
43  #include <asm/irq_remapping.h>
44  #include <asm/pc-conf-reg.h>
45  #include <asm/perf_event.h>
46  #include <asm/x86_init.h>
47  #include <linux/atomic.h>
48  #include <asm/barrier.h>
49  #include <asm/mpspec.h>
50  #include <asm/i8259.h>
51  #include <asm/proto.h>
52  #include <asm/traps.h>
53  #include <asm/apic.h>
54  #include <asm/acpi.h>
55  #include <asm/io_apic.h>
56  #include <asm/desc.h>
57  #include <asm/hpet.h>
58  #include <asm/mtrr.h>
59  #include <asm/time.h>
60  #include <asm/smp.h>
61  #include <asm/mce.h>
62  #include <asm/tsc.h>
63  #include <asm/hypervisor.h>
64  #include <asm/cpu_device_id.h>
65  #include <asm/intel-family.h>
66  #include <asm/irq_regs.h>
67  #include <asm/cpu.h>
68  
69  #include "local.h"
70  
71  /* Processor that is doing the boot up */
72  u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID;
73  EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
74  
75  u8 boot_cpu_apic_version __ro_after_init;
76  
77  /*
78   * This variable controls which CPUs receive external NMIs.  By default,
79   * external NMIs are delivered only to the BSP.
80   */
81  static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
82  
83  /*
84   * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
85   */
86  static bool virt_ext_dest_id __ro_after_init;
87  
88  /* For parallel bootup. */
89  unsigned long apic_mmio_base __ro_after_init;
90  
apic_accessible(void)91  static inline bool apic_accessible(void)
92  {
93  	return x2apic_mode || apic_mmio_base;
94  }
95  
96  #ifdef CONFIG_X86_32
97  /* Local APIC was disabled by the BIOS and enabled by the kernel */
98  static int enabled_via_apicbase __ro_after_init;
99  
100  /*
101   * Handle interrupt mode configuration register (IMCR).
102   * This register controls whether the interrupt signals
103   * that reach the BSP come from the master PIC or from the
104   * local APIC. Before entering Symmetric I/O Mode, either
105   * the BIOS or the operating system must switch out of
106   * PIC Mode by changing the IMCR.
107   */
imcr_pic_to_apic(void)108  static inline void imcr_pic_to_apic(void)
109  {
110  	/* NMI and 8259 INTR go through APIC */
111  	pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
112  }
113  
imcr_apic_to_pic(void)114  static inline void imcr_apic_to_pic(void)
115  {
116  	/* NMI and 8259 INTR go directly to BSP */
117  	pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
118  }
119  #endif
120  
121  /*
122   * Knob to control our willingness to enable the local APIC.
123   *
124   * +1=force-enable
125   */
126  static int force_enable_local_apic __initdata;
127  
128  /*
129   * APIC command line parameters
130   */
parse_lapic(char * arg)131  static int __init parse_lapic(char *arg)
132  {
133  	if (IS_ENABLED(CONFIG_X86_32) && !arg)
134  		force_enable_local_apic = 1;
135  	else if (arg && !strncmp(arg, "notscdeadline", 13))
136  		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
137  	return 0;
138  }
139  early_param("lapic", parse_lapic);
140  
141  #ifdef CONFIG_X86_64
142  static int apic_calibrate_pmtmr __initdata;
setup_apicpmtimer(char * s)143  static __init int setup_apicpmtimer(char *s)
144  {
145  	apic_calibrate_pmtmr = 1;
146  	notsc_setup(NULL);
147  	return 1;
148  }
149  __setup("apicpmtimer", setup_apicpmtimer);
150  #endif
151  
152  static unsigned long mp_lapic_addr __ro_after_init;
153  bool apic_is_disabled __ro_after_init;
154  /* Disable local APIC timer from the kernel commandline or via dmi quirk */
155  static int disable_apic_timer __initdata;
156  /* Local APIC timer works in C2 */
157  int local_apic_timer_c2_ok __ro_after_init;
158  EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
159  
160  /*
161   * Debug level, exported for io_apic.c
162   */
163  int apic_verbosity __ro_after_init;
164  
165  int pic_mode __ro_after_init;
166  
167  /* Have we found an MP table */
168  int smp_found_config __ro_after_init;
169  
170  static struct resource lapic_resource = {
171  	.name = "Local APIC",
172  	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
173  };
174  
175  unsigned int lapic_timer_period = 0;
176  
177  static void apic_pm_activate(void);
178  
179  /*
180   * Get the LAPIC version
181   */
lapic_get_version(void)182  static inline int lapic_get_version(void)
183  {
184  	return GET_APIC_VERSION(apic_read(APIC_LVR));
185  }
186  
187  /*
188   * Check, if the APIC is integrated or a separate chip
189   */
lapic_is_integrated(void)190  static inline int lapic_is_integrated(void)
191  {
192  	return APIC_INTEGRATED(lapic_get_version());
193  }
194  
195  /*
196   * Check, whether this is a modern or a first generation APIC
197   */
modern_apic(void)198  static int modern_apic(void)
199  {
200  	/* AMD systems use old APIC versions, so check the CPU */
201  	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
202  	    boot_cpu_data.x86 >= 0xf)
203  		return 1;
204  
205  	/* Hygon systems use modern APIC */
206  	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
207  		return 1;
208  
209  	return lapic_get_version() >= 0x14;
210  }
211  
212  /*
213   * right after this call apic become NOOP driven
214   * so apic->write/read doesn't do anything
215   */
apic_disable(void)216  static void __init apic_disable(void)
217  {
218  	apic_install_driver(&apic_noop);
219  }
220  
native_apic_icr_write(u32 low,u32 id)221  void native_apic_icr_write(u32 low, u32 id)
222  {
223  	unsigned long flags;
224  
225  	local_irq_save(flags);
226  	apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id));
227  	apic_write(APIC_ICR, low);
228  	local_irq_restore(flags);
229  }
230  
native_apic_icr_read(void)231  u64 native_apic_icr_read(void)
232  {
233  	u32 icr1, icr2;
234  
235  	icr2 = apic_read(APIC_ICR2);
236  	icr1 = apic_read(APIC_ICR);
237  
238  	return icr1 | ((u64)icr2 << 32);
239  }
240  
241  /**
242   * lapic_get_maxlvt - get the maximum number of local vector table entries
243   */
lapic_get_maxlvt(void)244  int lapic_get_maxlvt(void)
245  {
246  	/*
247  	 * - we always have APIC integrated on 64bit mode
248  	 * - 82489DXs do not report # of LVT entries
249  	 */
250  	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
251  }
252  
253  /*
254   * Local APIC timer
255   */
256  
257  /* Clock divisor */
258  #define APIC_DIVISOR 16
259  #define TSC_DIVISOR  8
260  
261  /* i82489DX specific */
262  #define		I82489DX_BASE_DIVIDER		(((0x2) << 18))
263  
264  /*
265   * This function sets up the local APIC timer, with a timeout of
266   * 'clocks' APIC bus clock. During calibration we actually call
267   * this function twice on the boot CPU, once with a bogus timeout
268   * value, second time for real. The other (noncalibrating) CPUs
269   * call this function only once, with the real, calibrated value.
270   *
271   * We do reads before writes even if unnecessary, to get around the
272   * P5 APIC double write bug.
273   */
__setup_APIC_LVTT(unsigned int clocks,int oneshot,int irqen)274  static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
275  {
276  	unsigned int lvtt_value, tmp_value;
277  
278  	lvtt_value = LOCAL_TIMER_VECTOR;
279  	if (!oneshot)
280  		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
281  	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
282  		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
283  
284  	/*
285  	 * The i82489DX APIC uses bit 18 and 19 for the base divider.  This
286  	 * overlaps with bit 18 on integrated APICs, but is not documented
287  	 * in the SDM. No problem though. i82489DX equipped systems do not
288  	 * have TSC deadline timer.
289  	 */
290  	if (!lapic_is_integrated())
291  		lvtt_value |= I82489DX_BASE_DIVIDER;
292  
293  	if (!irqen)
294  		lvtt_value |= APIC_LVT_MASKED;
295  
296  	apic_write(APIC_LVTT, lvtt_value);
297  
298  	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
299  		/*
300  		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
301  		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
302  		 * According to Intel, MFENCE can do the serialization here.
303  		 */
304  		asm volatile("mfence" : : : "memory");
305  		return;
306  	}
307  
308  	/*
309  	 * Divide PICLK by 16
310  	 */
311  	tmp_value = apic_read(APIC_TDCR);
312  	apic_write(APIC_TDCR,
313  		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
314  		APIC_TDR_DIV_16);
315  
316  	if (!oneshot)
317  		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
318  }
319  
320  /*
321   * Setup extended LVT, AMD specific
322   *
323   * Software should use the LVT offsets the BIOS provides.  The offsets
324   * are determined by the subsystems using it like those for MCE
325   * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
326   * are supported. Beginning with family 10h at least 4 offsets are
327   * available.
328   *
329   * Since the offsets must be consistent for all cores, we keep track
330   * of the LVT offsets in software and reserve the offset for the same
331   * vector also to be used on other cores. An offset is freed by
332   * setting the entry to APIC_EILVT_MASKED.
333   *
334   * If the BIOS is right, there should be no conflicts. Otherwise a
335   * "[Firmware Bug]: ..." error message is generated. However, if
336   * software does not properly determines the offsets, it is not
337   * necessarily a BIOS bug.
338   */
339  
340  static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
341  
eilvt_entry_is_changeable(unsigned int old,unsigned int new)342  static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
343  {
344  	return (old & APIC_EILVT_MASKED)
345  		|| (new == APIC_EILVT_MASKED)
346  		|| ((new & ~APIC_EILVT_MASKED) == old);
347  }
348  
reserve_eilvt_offset(int offset,unsigned int new)349  static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
350  {
351  	unsigned int rsvd, vector;
352  
353  	if (offset >= APIC_EILVT_NR_MAX)
354  		return ~0;
355  
356  	rsvd = atomic_read(&eilvt_offsets[offset]);
357  	do {
358  		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
359  		if (vector && !eilvt_entry_is_changeable(vector, new))
360  			/* may not change if vectors are different */
361  			return rsvd;
362  	} while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
363  
364  	rsvd = new & ~APIC_EILVT_MASKED;
365  	if (rsvd && rsvd != vector)
366  		pr_info("LVT offset %d assigned for vector 0x%02x\n",
367  			offset, rsvd);
368  
369  	return new;
370  }
371  
372  /*
373   * If mask=1, the LVT entry does not generate interrupts while mask=0
374   * enables the vector. See also the BKDGs. Must be called with
375   * preemption disabled.
376   */
377  
setup_APIC_eilvt(u8 offset,u8 vector,u8 msg_type,u8 mask)378  int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
379  {
380  	unsigned long reg = APIC_EILVTn(offset);
381  	unsigned int new, old, reserved;
382  
383  	new = (mask << 16) | (msg_type << 8) | vector;
384  	old = apic_read(reg);
385  	reserved = reserve_eilvt_offset(offset, new);
386  
387  	if (reserved != new) {
388  		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
389  		       "vector 0x%x, but the register is already in use for "
390  		       "vector 0x%x on another cpu\n",
391  		       smp_processor_id(), reg, offset, new, reserved);
392  		return -EINVAL;
393  	}
394  
395  	if (!eilvt_entry_is_changeable(old, new)) {
396  		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
397  		       "vector 0x%x, but the register is already in use for "
398  		       "vector 0x%x on this cpu\n",
399  		       smp_processor_id(), reg, offset, new, old);
400  		return -EBUSY;
401  	}
402  
403  	apic_write(reg, new);
404  
405  	return 0;
406  }
407  EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
408  
409  /*
410   * Program the next event, relative to now
411   */
lapic_next_event(unsigned long delta,struct clock_event_device * evt)412  static int lapic_next_event(unsigned long delta,
413  			    struct clock_event_device *evt)
414  {
415  	apic_write(APIC_TMICT, delta);
416  	return 0;
417  }
418  
lapic_next_deadline(unsigned long delta,struct clock_event_device * evt)419  static int lapic_next_deadline(unsigned long delta,
420  			       struct clock_event_device *evt)
421  {
422  	u64 tsc;
423  
424  	/* This MSR is special and need a special fence: */
425  	weak_wrmsr_fence();
426  
427  	tsc = rdtsc();
428  	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
429  	return 0;
430  }
431  
lapic_timer_shutdown(struct clock_event_device * evt)432  static int lapic_timer_shutdown(struct clock_event_device *evt)
433  {
434  	unsigned int v;
435  
436  	/* Lapic used as dummy for broadcast ? */
437  	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
438  		return 0;
439  
440  	v = apic_read(APIC_LVTT);
441  	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
442  	apic_write(APIC_LVTT, v);
443  
444  	/*
445  	 * Setting APIC_LVT_MASKED (above) should be enough to tell
446  	 * the hardware that this timer will never fire. But AMD
447  	 * erratum 411 and some Intel CPU behavior circa 2024 say
448  	 * otherwise.  Time for belt and suspenders programming: mask
449  	 * the timer _and_ zero the counter registers:
450  	 */
451  	if (v & APIC_LVT_TIMER_TSCDEADLINE)
452  		wrmsrl(MSR_IA32_TSC_DEADLINE, 0);
453  	else
454  		apic_write(APIC_TMICT, 0);
455  
456  	return 0;
457  }
458  
459  static inline int
lapic_timer_set_periodic_oneshot(struct clock_event_device * evt,bool oneshot)460  lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
461  {
462  	/* Lapic used as dummy for broadcast ? */
463  	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
464  		return 0;
465  
466  	__setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
467  	return 0;
468  }
469  
lapic_timer_set_periodic(struct clock_event_device * evt)470  static int lapic_timer_set_periodic(struct clock_event_device *evt)
471  {
472  	return lapic_timer_set_periodic_oneshot(evt, false);
473  }
474  
lapic_timer_set_oneshot(struct clock_event_device * evt)475  static int lapic_timer_set_oneshot(struct clock_event_device *evt)
476  {
477  	return lapic_timer_set_periodic_oneshot(evt, true);
478  }
479  
480  /*
481   * Local APIC timer broadcast function
482   */
lapic_timer_broadcast(const struct cpumask * mask)483  static void lapic_timer_broadcast(const struct cpumask *mask)
484  {
485  #ifdef CONFIG_SMP
486  	__apic_send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
487  #endif
488  }
489  
490  
491  /*
492   * The local apic timer can be used for any function which is CPU local.
493   */
494  static struct clock_event_device lapic_clockevent = {
495  	.name				= "lapic",
496  	.features			= CLOCK_EVT_FEAT_PERIODIC |
497  					  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
498  					  | CLOCK_EVT_FEAT_DUMMY,
499  	.shift				= 32,
500  	.set_state_shutdown		= lapic_timer_shutdown,
501  	.set_state_periodic		= lapic_timer_set_periodic,
502  	.set_state_oneshot		= lapic_timer_set_oneshot,
503  	.set_state_oneshot_stopped	= lapic_timer_shutdown,
504  	.set_next_event			= lapic_next_event,
505  	.broadcast			= lapic_timer_broadcast,
506  	.rating				= 100,
507  	.irq				= -1,
508  };
509  static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
510  
511  static const struct x86_cpu_id deadline_match[] __initconst = {
512  	X86_MATCH_VFM_STEPPINGS(INTEL_HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
513  	X86_MATCH_VFM_STEPPINGS(INTEL_HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
514  
515  	X86_MATCH_VFM(INTEL_BROADWELL_X,	0x0b000020),
516  
517  	X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
518  	X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
519  	X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
520  	X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
521  
522  	X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
523  	X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
524  	X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
525  
526  	X86_MATCH_VFM(INTEL_HASWELL,		0x22),
527  	X86_MATCH_VFM(INTEL_HASWELL_L,		0x20),
528  	X86_MATCH_VFM(INTEL_HASWELL_G,		0x17),
529  
530  	X86_MATCH_VFM(INTEL_BROADWELL,		0x25),
531  	X86_MATCH_VFM(INTEL_BROADWELL_G,	0x17),
532  
533  	X86_MATCH_VFM(INTEL_SKYLAKE_L,		0xb2),
534  	X86_MATCH_VFM(INTEL_SKYLAKE,		0xb2),
535  
536  	X86_MATCH_VFM(INTEL_KABYLAKE_L,		0x52),
537  	X86_MATCH_VFM(INTEL_KABYLAKE,		0x52),
538  
539  	{},
540  };
541  
apic_validate_deadline_timer(void)542  static __init bool apic_validate_deadline_timer(void)
543  {
544  	const struct x86_cpu_id *m;
545  	u32 rev;
546  
547  	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
548  		return false;
549  	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
550  		return true;
551  
552  	m = x86_match_cpu(deadline_match);
553  	if (!m)
554  		return true;
555  
556  	rev = (u32)m->driver_data;
557  
558  	if (boot_cpu_data.microcode >= rev)
559  		return true;
560  
561  	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
562  	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
563  	       "please update microcode to version: 0x%x (or later)\n", rev);
564  	return false;
565  }
566  
567  /*
568   * Setup the local APIC timer for this CPU. Copy the initialized values
569   * of the boot CPU and register the clock event in the framework.
570   */
setup_APIC_timer(void)571  static void setup_APIC_timer(void)
572  {
573  	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
574  
575  	if (this_cpu_has(X86_FEATURE_ARAT)) {
576  		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
577  		/* Make LAPIC timer preferable over percpu HPET */
578  		lapic_clockevent.rating = 150;
579  	}
580  
581  	memcpy(levt, &lapic_clockevent, sizeof(*levt));
582  	levt->cpumask = cpumask_of(smp_processor_id());
583  
584  	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
585  		levt->name = "lapic-deadline";
586  		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
587  				    CLOCK_EVT_FEAT_DUMMY);
588  		levt->set_next_event = lapic_next_deadline;
589  		clockevents_config_and_register(levt,
590  						tsc_khz * (1000 / TSC_DIVISOR),
591  						0xF, ~0UL);
592  	} else
593  		clockevents_register_device(levt);
594  }
595  
596  /*
597   * Install the updated TSC frequency from recalibration at the TSC
598   * deadline clockevent devices.
599   */
__lapic_update_tsc_freq(void * info)600  static void __lapic_update_tsc_freq(void *info)
601  {
602  	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
603  
604  	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
605  		return;
606  
607  	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
608  }
609  
lapic_update_tsc_freq(void)610  void lapic_update_tsc_freq(void)
611  {
612  	/*
613  	 * The clockevent device's ->mult and ->shift can both be
614  	 * changed. In order to avoid races, schedule the frequency
615  	 * update code on each CPU.
616  	 */
617  	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
618  }
619  
620  /*
621   * In this functions we calibrate APIC bus clocks to the external timer.
622   *
623   * We want to do the calibration only once since we want to have local timer
624   * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
625   * frequency.
626   *
627   * This was previously done by reading the PIT/HPET and waiting for a wrap
628   * around to find out, that a tick has elapsed. I have a box, where the PIT
629   * readout is broken, so it never gets out of the wait loop again. This was
630   * also reported by others.
631   *
632   * Monitoring the jiffies value is inaccurate and the clockevents
633   * infrastructure allows us to do a simple substitution of the interrupt
634   * handler.
635   *
636   * The calibration routine also uses the pm_timer when possible, as the PIT
637   * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
638   * back to normal later in the boot process).
639   */
640  
641  #define LAPIC_CAL_LOOPS		(HZ/10)
642  
643  static __initdata int lapic_cal_loops = -1;
644  static __initdata long lapic_cal_t1, lapic_cal_t2;
645  static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
646  static __initdata u32 lapic_cal_pm1, lapic_cal_pm2;
647  static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
648  
649  /*
650   * Temporary interrupt handler and polled calibration function.
651   */
lapic_cal_handler(struct clock_event_device * dev)652  static void __init lapic_cal_handler(struct clock_event_device *dev)
653  {
654  	unsigned long long tsc = 0;
655  	long tapic = apic_read(APIC_TMCCT);
656  	u32 pm = acpi_pm_read_early();
657  
658  	if (boot_cpu_has(X86_FEATURE_TSC))
659  		tsc = rdtsc();
660  
661  	switch (lapic_cal_loops++) {
662  	case 0:
663  		lapic_cal_t1 = tapic;
664  		lapic_cal_tsc1 = tsc;
665  		lapic_cal_pm1 = pm;
666  		lapic_cal_j1 = jiffies;
667  		break;
668  
669  	case LAPIC_CAL_LOOPS:
670  		lapic_cal_t2 = tapic;
671  		lapic_cal_tsc2 = tsc;
672  		if (pm < lapic_cal_pm1)
673  			pm += ACPI_PM_OVRRUN;
674  		lapic_cal_pm2 = pm;
675  		lapic_cal_j2 = jiffies;
676  		break;
677  	}
678  }
679  
680  static int __init
calibrate_by_pmtimer(u32 deltapm,long * delta,long * deltatsc)681  calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc)
682  {
683  	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
684  	const long pm_thresh = pm_100ms / 100;
685  	unsigned long mult;
686  	u64 res;
687  
688  #ifndef CONFIG_X86_PM_TIMER
689  	return -1;
690  #endif
691  
692  	apic_pr_verbose("... PM-Timer delta = %u\n", deltapm);
693  
694  	/* Check, if the PM timer is available */
695  	if (!deltapm)
696  		return -1;
697  
698  	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
699  
700  	if (deltapm > (pm_100ms - pm_thresh) &&
701  	    deltapm < (pm_100ms + pm_thresh)) {
702  		apic_pr_verbose("... PM-Timer result ok\n");
703  		return 0;
704  	}
705  
706  	res = (((u64)deltapm) *  mult) >> 22;
707  	do_div(res, 1000000);
708  	pr_warn("APIC calibration not consistent with PM-Timer: %ldms instead of 100ms\n",
709  		(long)res);
710  
711  	/* Correct the lapic counter value */
712  	res = (((u64)(*delta)) * pm_100ms);
713  	do_div(res, deltapm);
714  	pr_info("APIC delta adjusted to PM-Timer: "
715  		"%lu (%ld)\n", (unsigned long)res, *delta);
716  	*delta = (long)res;
717  
718  	/* Correct the tsc counter value */
719  	if (boot_cpu_has(X86_FEATURE_TSC)) {
720  		res = (((u64)(*deltatsc)) * pm_100ms);
721  		do_div(res, deltapm);
722  		apic_pr_verbose("TSC delta adjusted to PM-Timer: %lu (%ld)\n",
723  				(unsigned long)res, *deltatsc);
724  		*deltatsc = (long)res;
725  	}
726  
727  	return 0;
728  }
729  
lapic_init_clockevent(void)730  static int __init lapic_init_clockevent(void)
731  {
732  	if (!lapic_timer_period)
733  		return -1;
734  
735  	/* Calculate the scaled math multiplication factor */
736  	lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
737  					TICK_NSEC, lapic_clockevent.shift);
738  	lapic_clockevent.max_delta_ns =
739  		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
740  	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
741  	lapic_clockevent.min_delta_ns =
742  		clockevent_delta2ns(0xF, &lapic_clockevent);
743  	lapic_clockevent.min_delta_ticks = 0xF;
744  
745  	return 0;
746  }
747  
apic_needs_pit(void)748  bool __init apic_needs_pit(void)
749  {
750  	/*
751  	 * If the frequencies are not known, PIT is required for both TSC
752  	 * and apic timer calibration.
753  	 */
754  	if (!tsc_khz || !cpu_khz)
755  		return true;
756  
757  	/* Is there an APIC at all or is it disabled? */
758  	if (!boot_cpu_has(X86_FEATURE_APIC) || apic_is_disabled)
759  		return true;
760  
761  	/*
762  	 * If interrupt delivery mode is legacy PIC or virtual wire without
763  	 * configuration, the local APIC timer won't be set up. Make sure
764  	 * that the PIT is initialized.
765  	 */
766  	if (apic_intr_mode == APIC_PIC ||
767  	    apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
768  		return true;
769  
770  	/* Virt guests may lack ARAT, but still have DEADLINE */
771  	if (!boot_cpu_has(X86_FEATURE_ARAT))
772  		return true;
773  
774  	/* Deadline timer is based on TSC so no further PIT action required */
775  	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
776  		return false;
777  
778  	/* APIC timer disabled? */
779  	if (disable_apic_timer)
780  		return true;
781  	/*
782  	 * The APIC timer frequency is known already, no PIT calibration
783  	 * required. If unknown, let the PIT be initialized.
784  	 */
785  	return lapic_timer_period == 0;
786  }
787  
calibrate_APIC_clock(void)788  static int __init calibrate_APIC_clock(void)
789  {
790  	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
791  	u64 tsc_perj = 0, tsc_start = 0;
792  	unsigned long jif_start;
793  	unsigned long deltaj;
794  	long delta, deltatsc;
795  	int pm_referenced = 0;
796  
797  	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
798  		return 0;
799  
800  	/*
801  	 * Check if lapic timer has already been calibrated by platform
802  	 * specific routine, such as tsc calibration code. If so just fill
803  	 * in the clockevent structure and return.
804  	 */
805  	if (!lapic_init_clockevent()) {
806  		apic_pr_verbose("lapic timer already calibrated %d\n", lapic_timer_period);
807  		/*
808  		 * Direct calibration methods must have an always running
809  		 * local APIC timer, no need for broadcast timer.
810  		 */
811  		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
812  		return 0;
813  	}
814  
815  	apic_pr_verbose("Using local APIC timer interrupts. Calibrating APIC timer ...\n");
816  
817  	/*
818  	 * There are platforms w/o global clockevent devices. Instead of
819  	 * making the calibration conditional on that, use a polling based
820  	 * approach everywhere.
821  	 */
822  	local_irq_disable();
823  
824  	/*
825  	 * Setup the APIC counter to maximum. There is no way the lapic
826  	 * can underflow in the 100ms detection time frame
827  	 */
828  	__setup_APIC_LVTT(0xffffffff, 0, 0);
829  
830  	/*
831  	 * Methods to terminate the calibration loop:
832  	 *  1) Global clockevent if available (jiffies)
833  	 *  2) TSC if available and frequency is known
834  	 */
835  	jif_start = READ_ONCE(jiffies);
836  
837  	if (tsc_khz) {
838  		tsc_start = rdtsc();
839  		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
840  	}
841  
842  	/*
843  	 * Enable interrupts so the tick can fire, if a global
844  	 * clockevent device is available
845  	 */
846  	local_irq_enable();
847  
848  	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
849  		/* Wait for a tick to elapse */
850  		while (1) {
851  			if (tsc_khz) {
852  				u64 tsc_now = rdtsc();
853  				if ((tsc_now - tsc_start) >= tsc_perj) {
854  					tsc_start += tsc_perj;
855  					break;
856  				}
857  			} else {
858  				unsigned long jif_now = READ_ONCE(jiffies);
859  
860  				if (time_after(jif_now, jif_start)) {
861  					jif_start = jif_now;
862  					break;
863  				}
864  			}
865  			cpu_relax();
866  		}
867  
868  		/* Invoke the calibration routine */
869  		local_irq_disable();
870  		lapic_cal_handler(NULL);
871  		local_irq_enable();
872  	}
873  
874  	local_irq_disable();
875  
876  	/* Build delta t1-t2 as apic timer counts down */
877  	delta = lapic_cal_t1 - lapic_cal_t2;
878  	apic_pr_verbose("... lapic delta = %ld\n", delta);
879  
880  	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
881  
882  	/* we trust the PM based calibration if possible */
883  	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
884  					&delta, &deltatsc);
885  
886  	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
887  	lapic_init_clockevent();
888  
889  	apic_pr_verbose("..... delta %ld\n", delta);
890  	apic_pr_verbose("..... mult: %u\n", lapic_clockevent.mult);
891  	apic_pr_verbose("..... calibration result: %u\n", lapic_timer_period);
892  
893  	if (boot_cpu_has(X86_FEATURE_TSC)) {
894  		apic_pr_verbose("..... CPU clock speed is %ld.%04ld MHz.\n",
895  				(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
896  				(deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
897  	}
898  
899  	apic_pr_verbose("..... host bus clock speed is %u.%04u MHz.\n",
900  			lapic_timer_period / (1000000 / HZ),
901  			lapic_timer_period % (1000000 / HZ));
902  
903  	/*
904  	 * Do a sanity check on the APIC calibration result
905  	 */
906  	if (lapic_timer_period < (1000000 / HZ)) {
907  		local_irq_enable();
908  		pr_warn("APIC frequency too slow, disabling apic timer\n");
909  		return -1;
910  	}
911  
912  	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
913  
914  	/*
915  	 * PM timer calibration failed or not turned on so lets try APIC
916  	 * timer based calibration, if a global clockevent device is
917  	 * available.
918  	 */
919  	if (!pm_referenced && global_clock_event) {
920  		apic_pr_verbose("... verify APIC timer\n");
921  
922  		/*
923  		 * Setup the apic timer manually
924  		 */
925  		levt->event_handler = lapic_cal_handler;
926  		lapic_timer_set_periodic(levt);
927  		lapic_cal_loops = -1;
928  
929  		/* Let the interrupts run */
930  		local_irq_enable();
931  
932  		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
933  			cpu_relax();
934  
935  		/* Stop the lapic timer */
936  		local_irq_disable();
937  		lapic_timer_shutdown(levt);
938  
939  		/* Jiffies delta */
940  		deltaj = lapic_cal_j2 - lapic_cal_j1;
941  		apic_pr_verbose("... jiffies delta = %lu\n", deltaj);
942  
943  		/* Check, if the jiffies result is consistent */
944  		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
945  			apic_pr_verbose("... jiffies result ok\n");
946  		else
947  			levt->features |= CLOCK_EVT_FEAT_DUMMY;
948  	}
949  	local_irq_enable();
950  
951  	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
952  		pr_warn("APIC timer disabled due to verification failure\n");
953  		return -1;
954  	}
955  
956  	return 0;
957  }
958  
959  /*
960   * Setup the boot APIC
961   *
962   * Calibrate and verify the result.
963   */
setup_boot_APIC_clock(void)964  void __init setup_boot_APIC_clock(void)
965  {
966  	/*
967  	 * The local apic timer can be disabled via the kernel
968  	 * commandline or from the CPU detection code. Register the lapic
969  	 * timer as a dummy clock event source on SMP systems, so the
970  	 * broadcast mechanism is used. On UP systems simply ignore it.
971  	 */
972  	if (disable_apic_timer) {
973  		pr_info("Disabling APIC timer\n");
974  		/* No broadcast on UP ! */
975  		if (num_possible_cpus() > 1) {
976  			lapic_clockevent.mult = 1;
977  			setup_APIC_timer();
978  		}
979  		return;
980  	}
981  
982  	if (calibrate_APIC_clock()) {
983  		/* No broadcast on UP ! */
984  		if (num_possible_cpus() > 1)
985  			setup_APIC_timer();
986  		return;
987  	}
988  
989  	/*
990  	 * If nmi_watchdog is set to IO_APIC, we need the
991  	 * PIT/HPET going.  Otherwise register lapic as a dummy
992  	 * device.
993  	 */
994  	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
995  
996  	/* Setup the lapic or request the broadcast */
997  	setup_APIC_timer();
998  	amd_e400_c1e_apic_setup();
999  }
1000  
setup_secondary_APIC_clock(void)1001  void setup_secondary_APIC_clock(void)
1002  {
1003  	setup_APIC_timer();
1004  	amd_e400_c1e_apic_setup();
1005  }
1006  
1007  /*
1008   * The guts of the apic timer interrupt
1009   */
local_apic_timer_interrupt(void)1010  static void local_apic_timer_interrupt(void)
1011  {
1012  	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1013  
1014  	/*
1015  	 * Normally we should not be here till LAPIC has been initialized but
1016  	 * in some cases like kdump, its possible that there is a pending LAPIC
1017  	 * timer interrupt from previous kernel's context and is delivered in
1018  	 * new kernel the moment interrupts are enabled.
1019  	 *
1020  	 * Interrupts are enabled early and LAPIC is setup much later, hence
1021  	 * its possible that when we get here evt->event_handler is NULL.
1022  	 * Check for event_handler being NULL and discard the interrupt as
1023  	 * spurious.
1024  	 */
1025  	if (!evt->event_handler) {
1026  		pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
1027  			smp_processor_id());
1028  		/* Switch it off */
1029  		lapic_timer_shutdown(evt);
1030  		return;
1031  	}
1032  
1033  	/*
1034  	 * the NMI deadlock-detector uses this.
1035  	 */
1036  	inc_irq_stat(apic_timer_irqs);
1037  
1038  	evt->event_handler(evt);
1039  }
1040  
1041  /*
1042   * Local APIC timer interrupt. This is the most natural way for doing
1043   * local interrupts, but local timer interrupts can be emulated by
1044   * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1045   *
1046   * [ if a single-CPU system runs an SMP kernel then we call the local
1047   *   interrupt as well. Thus we cannot inline the local irq ... ]
1048   */
DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)1049  DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
1050  {
1051  	struct pt_regs *old_regs = set_irq_regs(regs);
1052  
1053  	apic_eoi();
1054  	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1055  	local_apic_timer_interrupt();
1056  	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1057  
1058  	set_irq_regs(old_regs);
1059  }
1060  
1061  /*
1062   * Local APIC start and shutdown
1063   */
1064  
1065  /**
1066   * clear_local_APIC - shutdown the local APIC
1067   *
1068   * This is called, when a CPU is disabled and before rebooting, so the state of
1069   * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1070   * leftovers during boot.
1071   */
clear_local_APIC(void)1072  void clear_local_APIC(void)
1073  {
1074  	int maxlvt;
1075  	u32 v;
1076  
1077  	if (!apic_accessible())
1078  		return;
1079  
1080  	maxlvt = lapic_get_maxlvt();
1081  	/*
1082  	 * Masking an LVT entry can trigger a local APIC error
1083  	 * if the vector is zero. Mask LVTERR first to prevent this.
1084  	 */
1085  	if (maxlvt >= 3) {
1086  		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1087  		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1088  	}
1089  	/*
1090  	 * Careful: we have to set masks only first to deassert
1091  	 * any level-triggered sources.
1092  	 */
1093  	v = apic_read(APIC_LVTT);
1094  	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1095  	v = apic_read(APIC_LVT0);
1096  	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1097  	v = apic_read(APIC_LVT1);
1098  	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1099  	if (maxlvt >= 4) {
1100  		v = apic_read(APIC_LVTPC);
1101  		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1102  	}
1103  
1104  	/* lets not touch this if we didn't frob it */
1105  #ifdef CONFIG_X86_THERMAL_VECTOR
1106  	if (maxlvt >= 5) {
1107  		v = apic_read(APIC_LVTTHMR);
1108  		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1109  	}
1110  #endif
1111  #ifdef CONFIG_X86_MCE_INTEL
1112  	if (maxlvt >= 6) {
1113  		v = apic_read(APIC_LVTCMCI);
1114  		if (!(v & APIC_LVT_MASKED))
1115  			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1116  	}
1117  #endif
1118  
1119  	/*
1120  	 * Clean APIC state for other OSs:
1121  	 */
1122  	apic_write(APIC_LVTT, APIC_LVT_MASKED);
1123  	apic_write(APIC_LVT0, APIC_LVT_MASKED);
1124  	apic_write(APIC_LVT1, APIC_LVT_MASKED);
1125  	if (maxlvt >= 3)
1126  		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1127  	if (maxlvt >= 4)
1128  		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1129  
1130  	/* Integrated APIC (!82489DX) ? */
1131  	if (lapic_is_integrated()) {
1132  		if (maxlvt > 3)
1133  			/* Clear ESR due to Pentium errata 3AP and 11AP */
1134  			apic_write(APIC_ESR, 0);
1135  		apic_read(APIC_ESR);
1136  	}
1137  }
1138  
1139  /**
1140   * apic_soft_disable - Clears and software disables the local APIC on hotplug
1141   *
1142   * Contrary to disable_local_APIC() this does not touch the enable bit in
1143   * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1144   * bus would require a hardware reset as the APIC would lose track of bus
1145   * arbitration. On systems with FSB delivery APICBASE could be disabled,
1146   * but it has to be guaranteed that no interrupt is sent to the APIC while
1147   * in that state and it's not clear from the SDM whether it still responds
1148   * to INIT/SIPI messages. Stay on the safe side and use software disable.
1149   */
apic_soft_disable(void)1150  void apic_soft_disable(void)
1151  {
1152  	u32 value;
1153  
1154  	clear_local_APIC();
1155  
1156  	/* Soft disable APIC (implies clearing of registers for 82489DX!). */
1157  	value = apic_read(APIC_SPIV);
1158  	value &= ~APIC_SPIV_APIC_ENABLED;
1159  	apic_write(APIC_SPIV, value);
1160  }
1161  
1162  /**
1163   * disable_local_APIC - clear and disable the local APIC
1164   */
disable_local_APIC(void)1165  void disable_local_APIC(void)
1166  {
1167  	if (!apic_accessible())
1168  		return;
1169  
1170  	apic_soft_disable();
1171  
1172  #ifdef CONFIG_X86_32
1173  	/*
1174  	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
1175  	 * restore the disabled state.
1176  	 */
1177  	if (enabled_via_apicbase) {
1178  		unsigned int l, h;
1179  
1180  		rdmsr(MSR_IA32_APICBASE, l, h);
1181  		l &= ~MSR_IA32_APICBASE_ENABLE;
1182  		wrmsr(MSR_IA32_APICBASE, l, h);
1183  	}
1184  #endif
1185  }
1186  
1187  /*
1188   * If Linux enabled the LAPIC against the BIOS default disable it down before
1189   * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1190   * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1191   * for the case where Linux didn't enable the LAPIC.
1192   */
lapic_shutdown(void)1193  void lapic_shutdown(void)
1194  {
1195  	unsigned long flags;
1196  
1197  	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1198  		return;
1199  
1200  	local_irq_save(flags);
1201  
1202  #ifdef CONFIG_X86_32
1203  	if (!enabled_via_apicbase)
1204  		clear_local_APIC();
1205  	else
1206  #endif
1207  		disable_local_APIC();
1208  
1209  
1210  	local_irq_restore(flags);
1211  }
1212  
1213  /**
1214   * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1215   */
sync_Arb_IDs(void)1216  void __init sync_Arb_IDs(void)
1217  {
1218  	/*
1219  	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1220  	 * needed on AMD.
1221  	 */
1222  	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1223  		return;
1224  
1225  	/*
1226  	 * Wait for idle.
1227  	 */
1228  	apic_wait_icr_idle();
1229  
1230  	apic_pr_debug("Synchronizing Arb IDs.\n");
1231  	apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
1232  }
1233  
1234  enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1235  
__apic_intr_mode_select(void)1236  static int __init __apic_intr_mode_select(void)
1237  {
1238  	/* Check kernel option */
1239  	if (apic_is_disabled) {
1240  		pr_info("APIC disabled via kernel command line\n");
1241  		return APIC_PIC;
1242  	}
1243  
1244  	/* Check BIOS */
1245  #ifdef CONFIG_X86_64
1246  	/* On 64-bit, the APIC must be integrated, Check local APIC only */
1247  	if (!boot_cpu_has(X86_FEATURE_APIC)) {
1248  		apic_is_disabled = true;
1249  		pr_info("APIC disabled by BIOS\n");
1250  		return APIC_PIC;
1251  	}
1252  #else
1253  	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
1254  
1255  	/* Neither 82489DX nor integrated APIC ? */
1256  	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1257  		apic_is_disabled = true;
1258  		return APIC_PIC;
1259  	}
1260  
1261  	/* If the BIOS pretends there is an integrated APIC ? */
1262  	if (!boot_cpu_has(X86_FEATURE_APIC) &&
1263  		APIC_INTEGRATED(boot_cpu_apic_version)) {
1264  		apic_is_disabled = true;
1265  		pr_err(FW_BUG "Local APIC not detected, force emulation\n");
1266  		return APIC_PIC;
1267  	}
1268  #endif
1269  
1270  	/* Check MP table or ACPI MADT configuration */
1271  	if (!smp_found_config) {
1272  		disable_ioapic_support();
1273  		if (!acpi_lapic) {
1274  			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1275  			return APIC_VIRTUAL_WIRE_NO_CONFIG;
1276  		}
1277  		return APIC_VIRTUAL_WIRE;
1278  	}
1279  
1280  #ifdef CONFIG_SMP
1281  	/* If SMP should be disabled, then really disable it! */
1282  	if (!setup_max_cpus) {
1283  		pr_info("APIC: SMP mode deactivated\n");
1284  		return APIC_SYMMETRIC_IO_NO_ROUTING;
1285  	}
1286  #endif
1287  
1288  	return APIC_SYMMETRIC_IO;
1289  }
1290  
1291  /* Select the interrupt delivery mode for the BSP */
apic_intr_mode_select(void)1292  void __init apic_intr_mode_select(void)
1293  {
1294  	apic_intr_mode = __apic_intr_mode_select();
1295  }
1296  
1297  /*
1298   * An initial setup of the virtual wire mode.
1299   */
init_bsp_APIC(void)1300  void __init init_bsp_APIC(void)
1301  {
1302  	unsigned int value;
1303  
1304  	/*
1305  	 * Don't do the setup now if we have a SMP BIOS as the
1306  	 * through-I/O-APIC virtual wire mode might be active.
1307  	 */
1308  	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1309  		return;
1310  
1311  	/*
1312  	 * Do not trust the local APIC being empty at bootup.
1313  	 */
1314  	clear_local_APIC();
1315  
1316  	/*
1317  	 * Enable APIC.
1318  	 */
1319  	value = apic_read(APIC_SPIV);
1320  	value &= ~APIC_VECTOR_MASK;
1321  	value |= APIC_SPIV_APIC_ENABLED;
1322  
1323  #ifdef CONFIG_X86_32
1324  	/* This bit is reserved on P4/Xeon and should be cleared */
1325  	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1326  	    (boot_cpu_data.x86 == 15))
1327  		value &= ~APIC_SPIV_FOCUS_DISABLED;
1328  	else
1329  #endif
1330  		value |= APIC_SPIV_FOCUS_DISABLED;
1331  	value |= SPURIOUS_APIC_VECTOR;
1332  	apic_write(APIC_SPIV, value);
1333  
1334  	/*
1335  	 * Set up the virtual wire mode.
1336  	 */
1337  	apic_write(APIC_LVT0, APIC_DM_EXTINT);
1338  	value = APIC_DM_NMI;
1339  	if (!lapic_is_integrated())		/* 82489DX */
1340  		value |= APIC_LVT_LEVEL_TRIGGER;
1341  	if (apic_extnmi == APIC_EXTNMI_NONE)
1342  		value |= APIC_LVT_MASKED;
1343  	apic_write(APIC_LVT1, value);
1344  }
1345  
1346  static void __init apic_bsp_setup(bool upmode);
1347  
1348  /* Init the interrupt delivery mode for the BSP */
apic_intr_mode_init(void)1349  void __init apic_intr_mode_init(void)
1350  {
1351  	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1352  
1353  	switch (apic_intr_mode) {
1354  	case APIC_PIC:
1355  		pr_info("APIC: Keep in PIC mode(8259)\n");
1356  		return;
1357  	case APIC_VIRTUAL_WIRE:
1358  		pr_info("APIC: Switch to virtual wire mode setup\n");
1359  		break;
1360  	case APIC_VIRTUAL_WIRE_NO_CONFIG:
1361  		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1362  		upmode = true;
1363  		break;
1364  	case APIC_SYMMETRIC_IO:
1365  		pr_info("APIC: Switch to symmetric I/O mode setup\n");
1366  		break;
1367  	case APIC_SYMMETRIC_IO_NO_ROUTING:
1368  		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1369  		break;
1370  	}
1371  
1372  	x86_64_probe_apic();
1373  
1374  	x86_32_install_bigsmp();
1375  
1376  	if (x86_platform.apic_post_init)
1377  		x86_platform.apic_post_init();
1378  
1379  	apic_bsp_setup(upmode);
1380  }
1381  
lapic_setup_esr(void)1382  static void lapic_setup_esr(void)
1383  {
1384  	unsigned int oldvalue, value, maxlvt;
1385  
1386  	if (!lapic_is_integrated()) {
1387  		pr_info("No ESR for 82489DX.\n");
1388  		return;
1389  	}
1390  
1391  	if (apic->disable_esr) {
1392  		/*
1393  		 * Something untraceable is creating bad interrupts on
1394  		 * secondary quads ... for the moment, just leave the
1395  		 * ESR disabled - we can't do anything useful with the
1396  		 * errors anyway - mbligh
1397  		 */
1398  		pr_info("Leaving ESR disabled.\n");
1399  		return;
1400  	}
1401  
1402  	maxlvt = lapic_get_maxlvt();
1403  	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
1404  		apic_write(APIC_ESR, 0);
1405  	oldvalue = apic_read(APIC_ESR);
1406  
1407  	/* enables sending errors */
1408  	value = ERROR_APIC_VECTOR;
1409  	apic_write(APIC_LVTERR, value);
1410  
1411  	/*
1412  	 * spec says clear errors after enabling vector.
1413  	 */
1414  	if (maxlvt > 3)
1415  		apic_write(APIC_ESR, 0);
1416  	value = apic_read(APIC_ESR);
1417  	if (value != oldvalue) {
1418  		apic_pr_verbose("ESR value before enabling vector: 0x%08x  after: 0x%08x\n",
1419  				oldvalue, value);
1420  	}
1421  }
1422  
1423  #define APIC_IR_REGS		APIC_ISR_NR
1424  #define APIC_IR_BITS		(APIC_IR_REGS * 32)
1425  #define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG)
1426  
1427  union apic_ir {
1428  	unsigned long	map[APIC_IR_MAPSIZE];
1429  	u32		regs[APIC_IR_REGS];
1430  };
1431  
apic_check_and_ack(union apic_ir * irr,union apic_ir * isr)1432  static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1433  {
1434  	int i, bit;
1435  
1436  	/* Read the IRRs */
1437  	for (i = 0; i < APIC_IR_REGS; i++)
1438  		irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1439  
1440  	/* Read the ISRs */
1441  	for (i = 0; i < APIC_IR_REGS; i++)
1442  		isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1443  
1444  	/*
1445  	 * If the ISR map is not empty. ACK the APIC and run another round
1446  	 * to verify whether a pending IRR has been unblocked and turned
1447  	 * into a ISR.
1448  	 */
1449  	if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1450  		/*
1451  		 * There can be multiple ISR bits set when a high priority
1452  		 * interrupt preempted a lower priority one. Issue an ACK
1453  		 * per set bit.
1454  		 */
1455  		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1456  			apic_eoi();
1457  		return true;
1458  	}
1459  
1460  	return !bitmap_empty(irr->map, APIC_IR_BITS);
1461  }
1462  
1463  /*
1464   * After a crash, we no longer service the interrupts and a pending
1465   * interrupt from previous kernel might still have ISR bit set.
1466   *
1467   * Most probably by now the CPU has serviced that pending interrupt and it
1468   * might not have done the apic_eoi() because it thought, interrupt
1469   * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1470   * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
1471   * a vector might get locked. It was noticed for timer irq (vector
1472   * 0x31). Issue an extra EOI to clear ISR.
1473   *
1474   * If there are pending IRR bits they turn into ISR bits after a higher
1475   * priority ISR bit has been acked.
1476   */
apic_pending_intr_clear(void)1477  static void apic_pending_intr_clear(void)
1478  {
1479  	union apic_ir irr, isr;
1480  	unsigned int i;
1481  
1482  	/* 512 loops are way oversized and give the APIC a chance to obey. */
1483  	for (i = 0; i < 512; i++) {
1484  		if (!apic_check_and_ack(&irr, &isr))
1485  			return;
1486  	}
1487  	/* Dump the IRR/ISR content if that failed */
1488  	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1489  }
1490  
1491  /**
1492   * setup_local_APIC - setup the local APIC
1493   *
1494   * Used to setup local APIC while initializing BSP or bringing up APs.
1495   * Always called with preemption disabled.
1496   */
setup_local_APIC(void)1497  static void setup_local_APIC(void)
1498  {
1499  	int cpu = smp_processor_id();
1500  	unsigned int value;
1501  
1502  	if (apic_is_disabled) {
1503  		disable_ioapic_support();
1504  		return;
1505  	}
1506  
1507  	/*
1508  	 * If this comes from kexec/kcrash the APIC might be enabled in
1509  	 * SPIV. Soft disable it before doing further initialization.
1510  	 */
1511  	value = apic_read(APIC_SPIV);
1512  	value &= ~APIC_SPIV_APIC_ENABLED;
1513  	apic_write(APIC_SPIV, value);
1514  
1515  #ifdef CONFIG_X86_32
1516  	/* Pound the ESR really hard over the head with a big hammer - mbligh */
1517  	if (lapic_is_integrated() && apic->disable_esr) {
1518  		apic_write(APIC_ESR, 0);
1519  		apic_write(APIC_ESR, 0);
1520  		apic_write(APIC_ESR, 0);
1521  		apic_write(APIC_ESR, 0);
1522  	}
1523  #endif
1524  	/*
1525  	 * Intel recommends to set DFR, LDR and TPR before enabling
1526  	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1527  	 * document number 292116).
1528  	 *
1529  	 * Except for APICs which operate in physical destination mode.
1530  	 */
1531  	if (apic->init_apic_ldr)
1532  		apic->init_apic_ldr();
1533  
1534  	/*
1535  	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
1536  	 * vector in the 16-31 range could be delivered if TPR == 0, but we
1537  	 * would think it's an exception and terrible things will happen.  We
1538  	 * never change this later on.
1539  	 */
1540  	value = apic_read(APIC_TASKPRI);
1541  	value &= ~APIC_TPRI_MASK;
1542  	value |= 0x10;
1543  	apic_write(APIC_TASKPRI, value);
1544  
1545  	/* Clear eventually stale ISR/IRR bits */
1546  	apic_pending_intr_clear();
1547  
1548  	/*
1549  	 * Now that we are all set up, enable the APIC
1550  	 */
1551  	value = apic_read(APIC_SPIV);
1552  	value &= ~APIC_VECTOR_MASK;
1553  	/*
1554  	 * Enable APIC
1555  	 */
1556  	value |= APIC_SPIV_APIC_ENABLED;
1557  
1558  #ifdef CONFIG_X86_32
1559  	/*
1560  	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1561  	 * certain networking cards. If high frequency interrupts are
1562  	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
1563  	 * entry is masked/unmasked at a high rate as well then sooner or
1564  	 * later IOAPIC line gets 'stuck', no more interrupts are received
1565  	 * from the device. If focus CPU is disabled then the hang goes
1566  	 * away, oh well :-(
1567  	 *
1568  	 * [ This bug can be reproduced easily with a level-triggered
1569  	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
1570  	 *   BX chipset. ]
1571  	 */
1572  	/*
1573  	 * Actually disabling the focus CPU check just makes the hang less
1574  	 * frequent as it makes the interrupt distribution model be more
1575  	 * like LRU than MRU (the short-term load is more even across CPUs).
1576  	 */
1577  
1578  	/*
1579  	 * - enable focus processor (bit==0)
1580  	 * - 64bit mode always use processor focus
1581  	 *   so no need to set it
1582  	 */
1583  	value &= ~APIC_SPIV_FOCUS_DISABLED;
1584  #endif
1585  
1586  	/*
1587  	 * Set spurious IRQ vector
1588  	 */
1589  	value |= SPURIOUS_APIC_VECTOR;
1590  	apic_write(APIC_SPIV, value);
1591  
1592  	perf_events_lapic_init();
1593  
1594  	/*
1595  	 * Set up LVT0, LVT1:
1596  	 *
1597  	 * set up through-local-APIC on the boot CPU's LINT0. This is not
1598  	 * strictly necessary in pure symmetric-IO mode, but sometimes
1599  	 * we delegate interrupts to the 8259A.
1600  	 */
1601  	/*
1602  	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1603  	 */
1604  	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1605  	if (!cpu && (pic_mode || !value || ioapic_is_disabled)) {
1606  		value = APIC_DM_EXTINT;
1607  		apic_pr_verbose("Enabled ExtINT on CPU#%d\n", cpu);
1608  	} else {
1609  		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1610  		apic_pr_verbose("Masked ExtINT on CPU#%d\n", cpu);
1611  	}
1612  	apic_write(APIC_LVT0, value);
1613  
1614  	/*
1615  	 * Only the BSP sees the LINT1 NMI signal by default. This can be
1616  	 * modified by apic_extnmi= boot option.
1617  	 */
1618  	if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1619  	    apic_extnmi == APIC_EXTNMI_ALL)
1620  		value = APIC_DM_NMI;
1621  	else
1622  		value = APIC_DM_NMI | APIC_LVT_MASKED;
1623  
1624  	/* Is 82489DX ? */
1625  	if (!lapic_is_integrated())
1626  		value |= APIC_LVT_LEVEL_TRIGGER;
1627  	apic_write(APIC_LVT1, value);
1628  
1629  #ifdef CONFIG_X86_MCE_INTEL
1630  	/* Recheck CMCI information after local APIC is up on CPU #0 */
1631  	if (!cpu)
1632  		cmci_recheck();
1633  #endif
1634  }
1635  
end_local_APIC_setup(void)1636  static void end_local_APIC_setup(void)
1637  {
1638  	lapic_setup_esr();
1639  
1640  #ifdef CONFIG_X86_32
1641  	{
1642  		unsigned int value;
1643  		/* Disable the local apic timer */
1644  		value = apic_read(APIC_LVTT);
1645  		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1646  		apic_write(APIC_LVTT, value);
1647  	}
1648  #endif
1649  
1650  	apic_pm_activate();
1651  }
1652  
1653  /*
1654   * APIC setup function for application processors. Called from smpboot.c
1655   */
apic_ap_setup(void)1656  void apic_ap_setup(void)
1657  {
1658  	setup_local_APIC();
1659  	end_local_APIC_setup();
1660  }
1661  
apic_read_boot_cpu_id(bool x2apic)1662  static __init void apic_read_boot_cpu_id(bool x2apic)
1663  {
1664  	/*
1665  	 * This can be invoked from check_x2apic() before the APIC has been
1666  	 * selected. But that code knows for sure that the BIOS enabled
1667  	 * X2APIC.
1668  	 */
1669  	if (x2apic) {
1670  		boot_cpu_physical_apicid = native_apic_msr_read(APIC_ID);
1671  		boot_cpu_apic_version = GET_APIC_VERSION(native_apic_msr_read(APIC_LVR));
1672  	} else {
1673  		boot_cpu_physical_apicid = read_apic_id();
1674  		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
1675  	}
1676  	topology_register_boot_apic(boot_cpu_physical_apicid);
1677  	x86_32_probe_bigsmp_early();
1678  }
1679  
1680  #ifdef CONFIG_X86_X2APIC
1681  int x2apic_mode;
1682  EXPORT_SYMBOL_GPL(x2apic_mode);
1683  
1684  enum {
1685  	X2APIC_OFF,
1686  	X2APIC_DISABLED,
1687  	/* All states below here have X2APIC enabled */
1688  	X2APIC_ON,
1689  	X2APIC_ON_LOCKED
1690  };
1691  static int x2apic_state;
1692  
x2apic_hw_locked(void)1693  static bool x2apic_hw_locked(void)
1694  {
1695  	u64 x86_arch_cap_msr;
1696  	u64 msr;
1697  
1698  	x86_arch_cap_msr = x86_read_arch_cap_msr();
1699  	if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
1700  		rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
1701  		return (msr & LEGACY_XAPIC_DISABLED);
1702  	}
1703  	return false;
1704  }
1705  
__x2apic_disable(void)1706  static void __x2apic_disable(void)
1707  {
1708  	u64 msr;
1709  
1710  	if (!boot_cpu_has(X86_FEATURE_APIC))
1711  		return;
1712  
1713  	rdmsrl(MSR_IA32_APICBASE, msr);
1714  	if (!(msr & X2APIC_ENABLE))
1715  		return;
1716  	/* Disable xapic and x2apic first and then reenable xapic mode */
1717  	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1718  	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1719  	printk_once(KERN_INFO "x2apic disabled\n");
1720  }
1721  
__x2apic_enable(void)1722  static void __x2apic_enable(void)
1723  {
1724  	u64 msr;
1725  
1726  	rdmsrl(MSR_IA32_APICBASE, msr);
1727  	if (msr & X2APIC_ENABLE)
1728  		return;
1729  	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1730  	printk_once(KERN_INFO "x2apic enabled\n");
1731  }
1732  
setup_nox2apic(char * str)1733  static int __init setup_nox2apic(char *str)
1734  {
1735  	if (x2apic_enabled()) {
1736  		u32 apicid = native_apic_msr_read(APIC_ID);
1737  
1738  		if (apicid >= 255) {
1739  			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
1740  				apicid);
1741  			return 0;
1742  		}
1743  		if (x2apic_hw_locked()) {
1744  			pr_warn("APIC locked in x2apic mode, can't disable\n");
1745  			return 0;
1746  		}
1747  		pr_warn("x2apic already enabled.\n");
1748  		__x2apic_disable();
1749  	}
1750  	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1751  	x2apic_state = X2APIC_DISABLED;
1752  	x2apic_mode = 0;
1753  	return 0;
1754  }
1755  early_param("nox2apic", setup_nox2apic);
1756  
1757  /* Called from cpu_init() to enable x2apic on (secondary) cpus */
x2apic_setup(void)1758  void x2apic_setup(void)
1759  {
1760  	/*
1761  	 * Try to make the AP's APIC state match that of the BSP,  but if the
1762  	 * BSP is unlocked and the AP is locked then there is a state mismatch.
1763  	 * Warn about the mismatch in case a GP fault occurs due to a locked AP
1764  	 * trying to be turned off.
1765  	 */
1766  	if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked())
1767  		pr_warn("x2apic lock mismatch between BSP and AP.\n");
1768  	/*
1769  	 * If x2apic is not in ON or LOCKED state, disable it if already enabled
1770  	 * from BIOS.
1771  	 */
1772  	if (x2apic_state < X2APIC_ON) {
1773  		__x2apic_disable();
1774  		return;
1775  	}
1776  	__x2apic_enable();
1777  }
1778  
1779  static __init void apic_set_fixmap(bool read_apic);
1780  
x2apic_disable(void)1781  static __init void x2apic_disable(void)
1782  {
1783  	u32 x2apic_id;
1784  
1785  	if (x2apic_state < X2APIC_ON)
1786  		return;
1787  
1788  	x2apic_id = read_apic_id();
1789  	if (x2apic_id >= 255)
1790  		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1791  
1792  	if (x2apic_hw_locked()) {
1793  		pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id);
1794  		return;
1795  	}
1796  
1797  	__x2apic_disable();
1798  
1799  	x2apic_mode = 0;
1800  	x2apic_state = X2APIC_DISABLED;
1801  
1802  	/*
1803  	 * Don't reread the APIC ID as it was already done from
1804  	 * check_x2apic() and the APIC driver still is a x2APIC variant,
1805  	 * which fails to do the read after x2APIC was disabled.
1806  	 */
1807  	apic_set_fixmap(false);
1808  }
1809  
x2apic_enable(void)1810  static __init void x2apic_enable(void)
1811  {
1812  	if (x2apic_state != X2APIC_OFF)
1813  		return;
1814  
1815  	x2apic_mode = 1;
1816  	x2apic_state = X2APIC_ON;
1817  	__x2apic_enable();
1818  }
1819  
try_to_enable_x2apic(int remap_mode)1820  static __init void try_to_enable_x2apic(int remap_mode)
1821  {
1822  	if (x2apic_state == X2APIC_DISABLED)
1823  		return;
1824  
1825  	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1826  		u32 apic_limit = 255;
1827  
1828  		/*
1829  		 * Using X2APIC without IR is not architecturally supported
1830  		 * on bare metal but may be supported in guests.
1831  		 */
1832  		if (!x86_init.hyper.x2apic_available()) {
1833  			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1834  			x2apic_disable();
1835  			return;
1836  		}
1837  
1838  		/*
1839  		 * If the hypervisor supports extended destination ID in
1840  		 * MSI, that increases the maximum APIC ID that can be
1841  		 * used for non-remapped IRQ domains.
1842  		 */
1843  		if (x86_init.hyper.msi_ext_dest_id()) {
1844  			virt_ext_dest_id = 1;
1845  			apic_limit = 32767;
1846  		}
1847  
1848  		/*
1849  		 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
1850  		 * in physical mode, and CPUs with an APIC ID that cannot
1851  		 * be addressed must not be brought online.
1852  		 */
1853  		x2apic_set_max_apicid(apic_limit);
1854  		x2apic_phys = 1;
1855  	}
1856  	x2apic_enable();
1857  }
1858  
check_x2apic(void)1859  void __init check_x2apic(void)
1860  {
1861  	if (x2apic_enabled()) {
1862  		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1863  		x2apic_mode = 1;
1864  		if (x2apic_hw_locked())
1865  			x2apic_state = X2APIC_ON_LOCKED;
1866  		else
1867  			x2apic_state = X2APIC_ON;
1868  		apic_read_boot_cpu_id(true);
1869  	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1870  		x2apic_state = X2APIC_DISABLED;
1871  	}
1872  }
1873  #else /* CONFIG_X86_X2APIC */
check_x2apic(void)1874  void __init check_x2apic(void)
1875  {
1876  	if (!apic_is_x2apic_enabled())
1877  		return;
1878  	/*
1879  	 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC?
1880  	 */
1881  	pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n");
1882  	pr_err("Disabling APIC, expect reduced performance and functionality.\n");
1883  
1884  	apic_is_disabled = true;
1885  	setup_clear_cpu_cap(X86_FEATURE_APIC);
1886  }
1887  
try_to_enable_x2apic(int remap_mode)1888  static inline void try_to_enable_x2apic(int remap_mode) { }
__x2apic_enable(void)1889  static inline void __x2apic_enable(void) { }
1890  #endif /* !CONFIG_X86_X2APIC */
1891  
enable_IR_x2apic(void)1892  void __init enable_IR_x2apic(void)
1893  {
1894  	unsigned long flags;
1895  	int ret, ir_stat;
1896  
1897  	if (ioapic_is_disabled) {
1898  		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1899  		return;
1900  	}
1901  
1902  	ir_stat = irq_remapping_prepare();
1903  	if (ir_stat < 0 && !x2apic_supported())
1904  		return;
1905  
1906  	ret = save_ioapic_entries();
1907  	if (ret) {
1908  		pr_info("Saving IO-APIC state failed: %d\n", ret);
1909  		return;
1910  	}
1911  
1912  	local_irq_save(flags);
1913  	legacy_pic->mask_all();
1914  	mask_ioapic_entries();
1915  
1916  	/* If irq_remapping_prepare() succeeded, try to enable it */
1917  	if (ir_stat >= 0)
1918  		ir_stat = irq_remapping_enable();
1919  	/* ir_stat contains the remap mode or an error code */
1920  	try_to_enable_x2apic(ir_stat);
1921  
1922  	if (ir_stat < 0)
1923  		restore_ioapic_entries();
1924  	legacy_pic->restore_mask();
1925  	local_irq_restore(flags);
1926  }
1927  
1928  #ifdef CONFIG_X86_64
1929  /*
1930   * Detect and enable local APICs on non-SMP boards.
1931   * Original code written by Keir Fraser.
1932   * On AMD64 we trust the BIOS - if it says no APIC it is likely
1933   * not correctly set up (usually the APIC timer won't work etc.)
1934   */
detect_init_APIC(void)1935  static bool __init detect_init_APIC(void)
1936  {
1937  	if (!boot_cpu_has(X86_FEATURE_APIC)) {
1938  		pr_info("No local APIC present\n");
1939  		return false;
1940  	}
1941  
1942  	register_lapic_address(APIC_DEFAULT_PHYS_BASE);
1943  	return true;
1944  }
1945  #else
1946  
apic_verify(unsigned long addr)1947  static bool __init apic_verify(unsigned long addr)
1948  {
1949  	u32 features, h, l;
1950  
1951  	/*
1952  	 * The APIC feature bit should now be enabled
1953  	 * in `cpuid'
1954  	 */
1955  	features = cpuid_edx(1);
1956  	if (!(features & (1 << X86_FEATURE_APIC))) {
1957  		pr_warn("Could not enable APIC!\n");
1958  		return false;
1959  	}
1960  	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1961  
1962  	/* The BIOS may have set up the APIC at some other address */
1963  	if (boot_cpu_data.x86 >= 6) {
1964  		rdmsr(MSR_IA32_APICBASE, l, h);
1965  		if (l & MSR_IA32_APICBASE_ENABLE)
1966  			addr = l & MSR_IA32_APICBASE_BASE;
1967  	}
1968  
1969  	register_lapic_address(addr);
1970  	pr_info("Found and enabled local APIC!\n");
1971  	return true;
1972  }
1973  
apic_force_enable(unsigned long addr)1974  bool __init apic_force_enable(unsigned long addr)
1975  {
1976  	u32 h, l;
1977  
1978  	if (apic_is_disabled)
1979  		return false;
1980  
1981  	/*
1982  	 * Some BIOSes disable the local APIC in the APIC_BASE
1983  	 * MSR. This can only be done in software for Intel P6 or later
1984  	 * and AMD K7 (Model > 1) or later.
1985  	 */
1986  	if (boot_cpu_data.x86 >= 6) {
1987  		rdmsr(MSR_IA32_APICBASE, l, h);
1988  		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1989  			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1990  			l &= ~MSR_IA32_APICBASE_BASE;
1991  			l |= MSR_IA32_APICBASE_ENABLE | addr;
1992  			wrmsr(MSR_IA32_APICBASE, l, h);
1993  			enabled_via_apicbase = 1;
1994  		}
1995  	}
1996  	return apic_verify(addr);
1997  }
1998  
1999  /*
2000   * Detect and initialize APIC
2001   */
detect_init_APIC(void)2002  static bool __init detect_init_APIC(void)
2003  {
2004  	/* Disabled by kernel option? */
2005  	if (apic_is_disabled)
2006  		return false;
2007  
2008  	switch (boot_cpu_data.x86_vendor) {
2009  	case X86_VENDOR_AMD:
2010  		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2011  		    (boot_cpu_data.x86 >= 15))
2012  			break;
2013  		goto no_apic;
2014  	case X86_VENDOR_HYGON:
2015  		break;
2016  	case X86_VENDOR_INTEL:
2017  		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2018  		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2019  			break;
2020  		goto no_apic;
2021  	default:
2022  		goto no_apic;
2023  	}
2024  
2025  	if (!boot_cpu_has(X86_FEATURE_APIC)) {
2026  		/*
2027  		 * Over-ride BIOS and try to enable the local APIC only if
2028  		 * "lapic" specified.
2029  		 */
2030  		if (!force_enable_local_apic) {
2031  			pr_info("Local APIC disabled by BIOS -- "
2032  				"you can enable it with \"lapic\"\n");
2033  			return false;
2034  		}
2035  		if (!apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2036  			return false;
2037  	} else {
2038  		if (!apic_verify(APIC_DEFAULT_PHYS_BASE))
2039  			return false;
2040  	}
2041  
2042  	apic_pm_activate();
2043  
2044  	return true;
2045  
2046  no_apic:
2047  	pr_info("No local APIC present or hardware disabled\n");
2048  	return false;
2049  }
2050  #endif
2051  
2052  /**
2053   * init_apic_mappings - initialize APIC mappings
2054   */
init_apic_mappings(void)2055  void __init init_apic_mappings(void)
2056  {
2057  	if (apic_validate_deadline_timer())
2058  		pr_info("TSC deadline timer available\n");
2059  
2060  	if (x2apic_mode)
2061  		return;
2062  
2063  	if (!smp_found_config) {
2064  		if (!detect_init_APIC()) {
2065  			pr_info("APIC: disable apic facility\n");
2066  			apic_disable();
2067  		}
2068  	}
2069  }
2070  
apic_set_fixmap(bool read_apic)2071  static __init void apic_set_fixmap(bool read_apic)
2072  {
2073  	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
2074  	apic_mmio_base = APIC_BASE;
2075  	apic_pr_verbose("Mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr);
2076  	if (read_apic)
2077  		apic_read_boot_cpu_id(false);
2078  }
2079  
register_lapic_address(unsigned long address)2080  void __init register_lapic_address(unsigned long address)
2081  {
2082  	/* This should only happen once */
2083  	WARN_ON_ONCE(mp_lapic_addr);
2084  	mp_lapic_addr = address;
2085  
2086  	if (!x2apic_mode)
2087  		apic_set_fixmap(true);
2088  }
2089  
2090  /*
2091   * Local APIC interrupts
2092   */
2093  
2094  /*
2095   * Common handling code for spurious_interrupt and spurious_vector entry
2096   * points below. No point in allowing the compiler to inline it twice.
2097   */
handle_spurious_interrupt(u8 vector)2098  static noinline void handle_spurious_interrupt(u8 vector)
2099  {
2100  	u32 v;
2101  
2102  	trace_spurious_apic_entry(vector);
2103  
2104  	inc_irq_stat(irq_spurious_count);
2105  
2106  	/*
2107  	 * If this is a spurious interrupt then do not acknowledge
2108  	 */
2109  	if (vector == SPURIOUS_APIC_VECTOR) {
2110  		/* See SDM vol 3 */
2111  		pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2112  			smp_processor_id());
2113  		goto out;
2114  	}
2115  
2116  	/*
2117  	 * If it is a vectored one, verify it's set in the ISR. If set,
2118  	 * acknowledge it.
2119  	 */
2120  	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2121  	if (v & (1 << (vector & 0x1f))) {
2122  		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2123  			vector, smp_processor_id());
2124  		apic_eoi();
2125  	} else {
2126  		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2127  			vector, smp_processor_id());
2128  	}
2129  out:
2130  	trace_spurious_apic_exit(vector);
2131  }
2132  
2133  /**
2134   * spurious_interrupt - Catch all for interrupts raised on unused vectors
2135   * @regs:	Pointer to pt_regs on stack
2136   * @vector:	The vector number
2137   *
2138   * This is invoked from ASM entry code to catch all interrupts which
2139   * trigger on an entry which is routed to the common_spurious idtentry
2140   * point.
2141   */
DEFINE_IDTENTRY_IRQ(spurious_interrupt)2142  DEFINE_IDTENTRY_IRQ(spurious_interrupt)
2143  {
2144  	handle_spurious_interrupt(vector);
2145  }
2146  
DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)2147  DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
2148  {
2149  	handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
2150  }
2151  
2152  /*
2153   * This interrupt should never happen with our APIC/SMP architecture
2154   */
DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)2155  DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
2156  {
2157  	static const char * const error_interrupt_reason[] = {
2158  		"Send CS error",		/* APIC Error Bit 0 */
2159  		"Receive CS error",		/* APIC Error Bit 1 */
2160  		"Send accept error",		/* APIC Error Bit 2 */
2161  		"Receive accept error",		/* APIC Error Bit 3 */
2162  		"Redirectable IPI",		/* APIC Error Bit 4 */
2163  		"Send illegal vector",		/* APIC Error Bit 5 */
2164  		"Received illegal vector",	/* APIC Error Bit 6 */
2165  		"Illegal register address",	/* APIC Error Bit 7 */
2166  	};
2167  	u32 v, i = 0;
2168  
2169  	trace_error_apic_entry(ERROR_APIC_VECTOR);
2170  
2171  	/* First tickle the hardware, only then report what went on. -- REW */
2172  	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
2173  		apic_write(APIC_ESR, 0);
2174  	v = apic_read(APIC_ESR);
2175  	apic_eoi();
2176  	atomic_inc(&irq_err_count);
2177  
2178  	apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v);
2179  
2180  	v &= 0xff;
2181  	while (v) {
2182  		if (v & 0x1)
2183  			apic_pr_debug_cont(" : %s", error_interrupt_reason[i]);
2184  		i++;
2185  		v >>= 1;
2186  	}
2187  
2188  	apic_pr_debug_cont("\n");
2189  
2190  	trace_error_apic_exit(ERROR_APIC_VECTOR);
2191  }
2192  
2193  /**
2194   * connect_bsp_APIC - attach the APIC to the interrupt system
2195   */
connect_bsp_APIC(void)2196  static void __init connect_bsp_APIC(void)
2197  {
2198  #ifdef CONFIG_X86_32
2199  	if (pic_mode) {
2200  		/*
2201  		 * Do not trust the local APIC being empty at bootup.
2202  		 */
2203  		clear_local_APIC();
2204  		/*
2205  		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
2206  		 * local APIC to INT and NMI lines.
2207  		 */
2208  		apic_pr_verbose("Leaving PIC mode, enabling APIC mode.\n");
2209  		imcr_pic_to_apic();
2210  	}
2211  #endif
2212  }
2213  
2214  /**
2215   * disconnect_bsp_APIC - detach the APIC from the interrupt system
2216   * @virt_wire_setup:	indicates, whether virtual wire mode is selected
2217   *
2218   * Virtual wire mode is necessary to deliver legacy interrupts even when the
2219   * APIC is disabled.
2220   */
disconnect_bsp_APIC(int virt_wire_setup)2221  void disconnect_bsp_APIC(int virt_wire_setup)
2222  {
2223  	unsigned int value;
2224  
2225  #ifdef CONFIG_X86_32
2226  	if (pic_mode) {
2227  		/*
2228  		 * Put the board back into PIC mode (has an effect only on
2229  		 * certain older boards).  Note that APIC interrupts, including
2230  		 * IPIs, won't work beyond this point!  The only exception are
2231  		 * INIT IPIs.
2232  		 */
2233  		apic_pr_verbose("Disabling APIC mode, entering PIC mode.\n");
2234  		imcr_apic_to_pic();
2235  		return;
2236  	}
2237  #endif
2238  
2239  	/* Go back to Virtual Wire compatibility mode */
2240  
2241  	/* For the spurious interrupt use vector F, and enable it */
2242  	value = apic_read(APIC_SPIV);
2243  	value &= ~APIC_VECTOR_MASK;
2244  	value |= APIC_SPIV_APIC_ENABLED;
2245  	value |= 0xf;
2246  	apic_write(APIC_SPIV, value);
2247  
2248  	if (!virt_wire_setup) {
2249  		/*
2250  		 * For LVT0 make it edge triggered, active high,
2251  		 * external and enabled
2252  		 */
2253  		value = apic_read(APIC_LVT0);
2254  		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2255  			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2256  			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2257  		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2258  		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2259  		apic_write(APIC_LVT0, value);
2260  	} else {
2261  		/* Disable LVT0 */
2262  		apic_write(APIC_LVT0, APIC_LVT_MASKED);
2263  	}
2264  
2265  	/*
2266  	 * For LVT1 make it edge triggered, active high,
2267  	 * nmi and enabled
2268  	 */
2269  	value = apic_read(APIC_LVT1);
2270  	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2271  			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2272  			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2273  	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2274  	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2275  	apic_write(APIC_LVT1, value);
2276  }
2277  
__irq_msi_compose_msg(struct irq_cfg * cfg,struct msi_msg * msg,bool dmar)2278  void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
2279  			   bool dmar)
2280  {
2281  	memset(msg, 0, sizeof(*msg));
2282  
2283  	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
2284  	msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
2285  	msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
2286  
2287  	msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
2288  	msg->arch_data.vector = cfg->vector;
2289  
2290  	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
2291  	/*
2292  	 * Only the IOMMU itself can use the trick of putting destination
2293  	 * APIC ID into the high bits of the address. Anything else would
2294  	 * just be writing to memory if it tried that, and needs IR to
2295  	 * address APICs which can't be addressed in the normal 32-bit
2296  	 * address range at 0xFFExxxxx. That is typically just 8 bits, but
2297  	 * some hypervisors allow the extended destination ID field in bits
2298  	 * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
2299  	 */
2300  	if (dmar)
2301  		msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
2302  	else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
2303  		msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
2304  	else
2305  		WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
2306  }
2307  
x86_msi_msg_get_destid(struct msi_msg * msg,bool extid)2308  u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
2309  {
2310  	u32 dest = msg->arch_addr_lo.destid_0_7;
2311  
2312  	if (extid)
2313  		dest |= msg->arch_addr_hi.destid_8_31 << 8;
2314  	return dest;
2315  }
2316  EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
2317  
apic_bsp_up_setup(void)2318  static void __init apic_bsp_up_setup(void)
2319  {
2320  	reset_phys_cpu_present_map(boot_cpu_physical_apicid);
2321  }
2322  
2323  /**
2324   * apic_bsp_setup - Setup function for local apic and io-apic
2325   * @upmode:		Force UP mode (for APIC_init_uniprocessor)
2326   */
apic_bsp_setup(bool upmode)2327  static void __init apic_bsp_setup(bool upmode)
2328  {
2329  	connect_bsp_APIC();
2330  	if (upmode)
2331  		apic_bsp_up_setup();
2332  	setup_local_APIC();
2333  
2334  	enable_IO_APIC();
2335  	end_local_APIC_setup();
2336  	irq_remap_enable_fault_handling();
2337  	setup_IO_APIC();
2338  	lapic_update_legacy_vectors();
2339  }
2340  
2341  #ifdef CONFIG_UP_LATE_INIT
up_late_init(void)2342  void __init up_late_init(void)
2343  {
2344  	if (apic_intr_mode == APIC_PIC)
2345  		return;
2346  
2347  	/* Setup local timer */
2348  	x86_init.timers.setup_percpu_clockev();
2349  }
2350  #endif
2351  
2352  /*
2353   * Power management
2354   */
2355  #ifdef CONFIG_PM
2356  
2357  static struct {
2358  	/*
2359  	 * 'active' is true if the local APIC was enabled by us and
2360  	 * not the BIOS; this signifies that we are also responsible
2361  	 * for disabling it before entering apm/acpi suspend
2362  	 */
2363  	int active;
2364  	/* r/w apic fields */
2365  	u32 apic_id;
2366  	unsigned int apic_taskpri;
2367  	unsigned int apic_ldr;
2368  	unsigned int apic_dfr;
2369  	unsigned int apic_spiv;
2370  	unsigned int apic_lvtt;
2371  	unsigned int apic_lvtpc;
2372  	unsigned int apic_lvt0;
2373  	unsigned int apic_lvt1;
2374  	unsigned int apic_lvterr;
2375  	unsigned int apic_tmict;
2376  	unsigned int apic_tdcr;
2377  	unsigned int apic_thmr;
2378  	unsigned int apic_cmci;
2379  } apic_pm_state;
2380  
lapic_suspend(void)2381  static int lapic_suspend(void)
2382  {
2383  	unsigned long flags;
2384  	int maxlvt;
2385  
2386  	if (!apic_pm_state.active)
2387  		return 0;
2388  
2389  	maxlvt = lapic_get_maxlvt();
2390  
2391  	apic_pm_state.apic_id = apic_read(APIC_ID);
2392  	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2393  	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2394  	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2395  	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2396  	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2397  	if (maxlvt >= 4)
2398  		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2399  	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2400  	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2401  	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2402  	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2403  	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2404  #ifdef CONFIG_X86_THERMAL_VECTOR
2405  	if (maxlvt >= 5)
2406  		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2407  #endif
2408  #ifdef CONFIG_X86_MCE_INTEL
2409  	if (maxlvt >= 6)
2410  		apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2411  #endif
2412  
2413  	local_irq_save(flags);
2414  
2415  	/*
2416  	 * Mask IOAPIC before disabling the local APIC to prevent stale IRR
2417  	 * entries on some implementations.
2418  	 */
2419  	mask_ioapic_entries();
2420  
2421  	disable_local_APIC();
2422  
2423  	irq_remapping_disable();
2424  
2425  	local_irq_restore(flags);
2426  	return 0;
2427  }
2428  
lapic_resume(void)2429  static void lapic_resume(void)
2430  {
2431  	unsigned int l, h;
2432  	unsigned long flags;
2433  	int maxlvt;
2434  
2435  	if (!apic_pm_state.active)
2436  		return;
2437  
2438  	local_irq_save(flags);
2439  
2440  	/*
2441  	 * IO-APIC and PIC have their own resume routines.
2442  	 * We just mask them here to make sure the interrupt
2443  	 * subsystem is completely quiet while we enable x2apic
2444  	 * and interrupt-remapping.
2445  	 */
2446  	mask_ioapic_entries();
2447  	legacy_pic->mask_all();
2448  
2449  	if (x2apic_mode) {
2450  		__x2apic_enable();
2451  	} else {
2452  		/*
2453  		 * Make sure the APICBASE points to the right address
2454  		 *
2455  		 * FIXME! This will be wrong if we ever support suspend on
2456  		 * SMP! We'll need to do this as part of the CPU restore!
2457  		 */
2458  		if (boot_cpu_data.x86 >= 6) {
2459  			rdmsr(MSR_IA32_APICBASE, l, h);
2460  			l &= ~MSR_IA32_APICBASE_BASE;
2461  			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2462  			wrmsr(MSR_IA32_APICBASE, l, h);
2463  		}
2464  	}
2465  
2466  	maxlvt = lapic_get_maxlvt();
2467  	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2468  	apic_write(APIC_ID, apic_pm_state.apic_id);
2469  	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2470  	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2471  	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2472  	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2473  	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2474  	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2475  #ifdef CONFIG_X86_THERMAL_VECTOR
2476  	if (maxlvt >= 5)
2477  		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2478  #endif
2479  #ifdef CONFIG_X86_MCE_INTEL
2480  	if (maxlvt >= 6)
2481  		apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2482  #endif
2483  	if (maxlvt >= 4)
2484  		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2485  	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2486  	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2487  	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2488  	apic_write(APIC_ESR, 0);
2489  	apic_read(APIC_ESR);
2490  	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2491  	apic_write(APIC_ESR, 0);
2492  	apic_read(APIC_ESR);
2493  
2494  	irq_remapping_reenable(x2apic_mode);
2495  
2496  	local_irq_restore(flags);
2497  }
2498  
2499  /*
2500   * This device has no shutdown method - fully functioning local APICs
2501   * are needed on every CPU up until machine_halt/restart/poweroff.
2502   */
2503  
2504  static struct syscore_ops lapic_syscore_ops = {
2505  	.resume		= lapic_resume,
2506  	.suspend	= lapic_suspend,
2507  };
2508  
apic_pm_activate(void)2509  static void apic_pm_activate(void)
2510  {
2511  	apic_pm_state.active = 1;
2512  }
2513  
init_lapic_sysfs(void)2514  static int __init init_lapic_sysfs(void)
2515  {
2516  	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2517  	if (boot_cpu_has(X86_FEATURE_APIC))
2518  		register_syscore_ops(&lapic_syscore_ops);
2519  
2520  	return 0;
2521  }
2522  
2523  /* local apic needs to resume before other devices access its registers. */
2524  core_initcall(init_lapic_sysfs);
2525  
2526  #else	/* CONFIG_PM */
2527  
apic_pm_activate(void)2528  static void apic_pm_activate(void) { }
2529  
2530  #endif	/* CONFIG_PM */
2531  
2532  #ifdef CONFIG_X86_64
2533  
2534  static int multi_checked;
2535  static int multi;
2536  
set_multi(const struct dmi_system_id * d)2537  static int set_multi(const struct dmi_system_id *d)
2538  {
2539  	if (multi)
2540  		return 0;
2541  	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2542  	multi = 1;
2543  	return 0;
2544  }
2545  
2546  static const struct dmi_system_id multi_dmi_table[] = {
2547  	{
2548  		.callback = set_multi,
2549  		.ident = "IBM System Summit2",
2550  		.matches = {
2551  			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2552  			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2553  		},
2554  	},
2555  	{}
2556  };
2557  
dmi_check_multi(void)2558  static void dmi_check_multi(void)
2559  {
2560  	if (multi_checked)
2561  		return;
2562  
2563  	dmi_check_system(multi_dmi_table);
2564  	multi_checked = 1;
2565  }
2566  
2567  /*
2568   * apic_is_clustered_box() -- Check if we can expect good TSC
2569   *
2570   * Thus far, the major user of this is IBM's Summit2 series:
2571   * Clustered boxes may have unsynced TSC problems if they are
2572   * multi-chassis.
2573   * Use DMI to check them
2574   */
apic_is_clustered_box(void)2575  int apic_is_clustered_box(void)
2576  {
2577  	dmi_check_multi();
2578  	return multi;
2579  }
2580  #endif
2581  
2582  /*
2583   * APIC command line parameters
2584   */
setup_disableapic(char * arg)2585  static int __init setup_disableapic(char *arg)
2586  {
2587  	apic_is_disabled = true;
2588  	setup_clear_cpu_cap(X86_FEATURE_APIC);
2589  	return 0;
2590  }
2591  early_param("disableapic", setup_disableapic);
2592  
2593  /* same as disableapic, for compatibility */
setup_nolapic(char * arg)2594  static int __init setup_nolapic(char *arg)
2595  {
2596  	return setup_disableapic(arg);
2597  }
2598  early_param("nolapic", setup_nolapic);
2599  
parse_lapic_timer_c2_ok(char * arg)2600  static int __init parse_lapic_timer_c2_ok(char *arg)
2601  {
2602  	local_apic_timer_c2_ok = 1;
2603  	return 0;
2604  }
2605  early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2606  
parse_disable_apic_timer(char * arg)2607  static int __init parse_disable_apic_timer(char *arg)
2608  {
2609  	disable_apic_timer = 1;
2610  	return 0;
2611  }
2612  early_param("noapictimer", parse_disable_apic_timer);
2613  
parse_nolapic_timer(char * arg)2614  static int __init parse_nolapic_timer(char *arg)
2615  {
2616  	disable_apic_timer = 1;
2617  	return 0;
2618  }
2619  early_param("nolapic_timer", parse_nolapic_timer);
2620  
apic_set_verbosity(char * arg)2621  static int __init apic_set_verbosity(char *arg)
2622  {
2623  	if (!arg)  {
2624  		if (IS_ENABLED(CONFIG_X86_32))
2625  			return -EINVAL;
2626  
2627  		ioapic_is_disabled = false;
2628  		return 0;
2629  	}
2630  
2631  	if (strcmp("debug", arg) == 0)
2632  		apic_verbosity = APIC_DEBUG;
2633  	else if (strcmp("verbose", arg) == 0)
2634  		apic_verbosity = APIC_VERBOSE;
2635  #ifdef CONFIG_X86_64
2636  	else {
2637  		pr_warn("APIC Verbosity level %s not recognised"
2638  			" use apic=verbose or apic=debug\n", arg);
2639  		return -EINVAL;
2640  	}
2641  #endif
2642  
2643  	return 0;
2644  }
2645  early_param("apic", apic_set_verbosity);
2646  
lapic_insert_resource(void)2647  static int __init lapic_insert_resource(void)
2648  {
2649  	if (!apic_mmio_base)
2650  		return -1;
2651  
2652  	/* Put local APIC into the resource map. */
2653  	lapic_resource.start = apic_mmio_base;
2654  	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2655  	insert_resource(&iomem_resource, &lapic_resource);
2656  
2657  	return 0;
2658  }
2659  
2660  /*
2661   * need call insert after e820__reserve_resources()
2662   * that is using request_resource
2663   */
2664  late_initcall(lapic_insert_resource);
2665  
apic_set_extnmi(char * arg)2666  static int __init apic_set_extnmi(char *arg)
2667  {
2668  	if (!arg)
2669  		return -EINVAL;
2670  
2671  	if (!strncmp("all", arg, 3))
2672  		apic_extnmi = APIC_EXTNMI_ALL;
2673  	else if (!strncmp("none", arg, 4))
2674  		apic_extnmi = APIC_EXTNMI_NONE;
2675  	else if (!strncmp("bsp", arg, 3))
2676  		apic_extnmi = APIC_EXTNMI_BSP;
2677  	else {
2678  		pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2679  		return -EINVAL;
2680  	}
2681  
2682  	return 0;
2683  }
2684  early_param("apic_extnmi", apic_set_extnmi);
2685