xref: /openbmc/linux/Documentation/arch/ia64/err_inject.rst (revision f4356947f0297b0962fdd197672db7edf9f58be6)
1========================================
2IPF Machine Check (MC) error inject tool
3========================================
4
5IPF Machine Check (MC) error inject tool is used to inject MC
6errors from Linux. The tool is a test bed for IPF MC work flow including
7hardware correctable error handling, OS recoverable error handling, MC
8event logging, etc.
9
10The tool includes two parts: a kernel driver and a user application
11sample. The driver provides interface to PAL to inject error
12and query error injection capabilities. The driver code is in
13arch/ia64/kernel/err_inject.c. The application sample (shown below)
14provides a combination of various errors and calls the driver's interface
15(sysfs interface) to inject errors or query error injection capabilities.
16
17The tool can be used to test Intel IPF machine MC handling capabilities.
18It's especially useful for people who can not access hardware MC injection
19tool to inject error. It's also very useful to integrate with other
20software test suits to do stressful testing on IPF.
21
22Below is a sample application as part of the whole tool. The sample
23can be used as a working test tool. Or it can be expanded to include
24more features. It also can be a integrated into a library or other user
25application to have more thorough test.
26
27The sample application takes err.conf as error configuration input. GCC
28compiles the code. After you install err_inject driver, you can run
29this sample application to inject errors.
30
31Errata: Itanium 2 Processors Specification Update lists some errata against
32the pal_mc_error_inject PAL procedure. The following err.conf has been tested
33on latest Montecito PAL.
34
35err.conf::
36
37  #This is configuration file for err_inject_tool.
38  #The format of the each line is:
39  #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
40  #where
41  #	cpu: logical cpu number the error will be inject in.
42  #	loop: times the error will be injected.
43  #	interval: In second. every so often one error is injected.
44  #	err_type_info, err_struct_info: PAL parameters.
45  #
46  #Note: All values are hex w/o or w/ 0x prefix.
47
48
49  #On cpu2, inject only total 0x10 errors, interval 5 seconds
50  #corrected, data cache, hier-2, physical addr(assigned by tool code).
51  #working on Montecito latest PAL.
52  2, 10, 5, 4101, 95
53
54  #On cpu4, inject and consume total 0x10 errors, interval 5 seconds
55  #corrected, data cache, hier-2, physical addr(assigned by tool code).
56  #working on Montecito latest PAL.
57  4, 10, 5, 4109, 95
58
59  #On cpu15, inject and consume total 0x10 errors, interval 5 seconds
60  #recoverable, DTR0, hier-2.
61  #working on Montecito latest PAL.
62  0xf, 0x10, 5, 4249, 15
63
64The sample application source code:
65
66err_injection_tool.c::
67
68  /*
69   * This program is free software; you can redistribute it and/or modify
70   * it under the terms of the GNU General Public License as published by
71   * the Free Software Foundation; either version 2 of the License, or
72   * (at your option) any later version.
73   *
74   * This program is distributed in the hope that it will be useful, but
75   * WITHOUT ANY WARRANTY; without even the implied warranty of
76   * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
77   * NON INFRINGEMENT.  See the GNU General Public License for more
78   * details.
79   *
80   * You should have received a copy of the GNU General Public License
81   * along with this program; if not, write to the Free Software
82   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
83   *
84   * Copyright (C) 2006 Intel Co
85   *	Fenghua Yu <fenghua.yu@intel.com>
86   *
87   */
88  #include <sys/types.h>
89  #include <sys/stat.h>
90  #include <fcntl.h>
91  #include <stdio.h>
92  #include <sched.h>
93  #include <unistd.h>
94  #include <stdlib.h>
95  #include <stdarg.h>
96  #include <string.h>
97  #include <errno.h>
98  #include <time.h>
99  #include <sys/ipc.h>
100  #include <sys/sem.h>
101  #include <sys/wait.h>
102  #include <sys/mman.h>
103  #include <sys/shm.h>
104
105  #define MAX_FN_SIZE 		256
106  #define MAX_BUF_SIZE 		256
107  #define DATA_BUF_SIZE 		256
108  #define NR_CPUS 		512
109  #define MAX_TASK_NUM		2048
110  #define MIN_INTERVAL		5	// seconds
111  #define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
112  #define PARA_FIELD_NUM		5
113  #define MASK_SIZE		(NR_CPUS/64)
114  #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
115
116  int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
117
118  int verbose;
119  #define vbprintf if (verbose) printf
120
121  int log_info(int cpu, const char *fmt, ...)
122  {
123	FILE *log;
124	char fn[MAX_FN_SIZE];
125	char buf[MAX_BUF_SIZE];
126	va_list args;
127
128	sprintf(fn, "%d.log", cpu);
129	log=fopen(fn, "a+");
130	if (log==NULL) {
131		perror("Error open:");
132		return -1;
133	}
134
135	va_start(args, fmt);
136	vprintf(fmt, args);
137	memset(buf, 0, MAX_BUF_SIZE);
138	vsprintf(buf, fmt, args);
139	va_end(args);
140
141	fwrite(buf, sizeof(buf), 1, log);
142	fclose(log);
143
144	return 0;
145  }
146
147  typedef unsigned long u64;
148  typedef unsigned int  u32;
149
150  typedef union err_type_info_u {
151	struct {
152		u64	mode		: 3,	/* 0-2 */
153			err_inj		: 3,	/* 3-5 */
154			err_sev		: 2,	/* 6-7 */
155			err_struct	: 5,	/* 8-12 */
156			struct_hier	: 3,	/* 13-15 */
157			reserved	: 48;	/* 16-63 */
158	} err_type_info_u;
159	u64	err_type_info;
160  } err_type_info_t;
161
162  typedef union err_struct_info_u {
163	struct {
164		u64	siv		: 1,	/* 0	 */
165			c_t		: 2,	/* 1-2	 */
166			cl_p		: 3,	/* 3-5	 */
167			cl_id		: 3,	/* 6-8	 */
168			cl_dp		: 1,	/* 9	 */
169			reserved1	: 22,	/* 10-31 */
170			tiv		: 1,	/* 32	 */
171			trigger		: 4,	/* 33-36 */
172			trigger_pl 	: 3,	/* 37-39 */
173			reserved2 	: 24;	/* 40-63 */
174	} err_struct_info_cache;
175	struct {
176		u64	siv		: 1,	/* 0	 */
177			tt		: 2,	/* 1-2	 */
178			tc_tr		: 2,	/* 3-4	 */
179			tr_slot		: 8,	/* 5-12	 */
180			reserved1	: 19,	/* 13-31 */
181			tiv		: 1,	/* 32	 */
182			trigger		: 4,	/* 33-36 */
183			trigger_pl 	: 3,	/* 37-39 */
184			reserved2 	: 24;	/* 40-63 */
185	} err_struct_info_tlb;
186	struct {
187		u64	siv		: 1,	/* 0	 */
188			regfile_id	: 4,	/* 1-4	 */
189			reg_num		: 7,	/* 5-11	 */
190			reserved1	: 20,	/* 12-31 */
191			tiv		: 1,	/* 32	 */
192			trigger		: 4,	/* 33-36 */
193			trigger_pl 	: 3,	/* 37-39 */
194			reserved2 	: 24;	/* 40-63 */
195	} err_struct_info_register;
196	struct {
197		u64	reserved;
198	} err_struct_info_bus_processor_interconnect;
199	u64	err_struct_info;
200  } err_struct_info_t;
201
202  typedef union err_data_buffer_u {
203	struct {
204		u64	trigger_addr;		/* 0-63		*/
205		u64	inj_addr;		/* 64-127 	*/
206		u64	way		: 5,	/* 128-132	*/
207			index		: 20,	/* 133-152	*/
208					: 39;	/* 153-191	*/
209	} err_data_buffer_cache;
210	struct {
211		u64	trigger_addr;		/* 0-63		*/
212		u64	inj_addr;		/* 64-127 	*/
213		u64	way		: 5,	/* 128-132	*/
214			index		: 20,	/* 133-152	*/
215			reserved	: 39;	/* 153-191	*/
216	} err_data_buffer_tlb;
217	struct {
218		u64	trigger_addr;		/* 0-63		*/
219	} err_data_buffer_register;
220	struct {
221		u64	reserved;		/* 0-63		*/
222	} err_data_buffer_bus_processor_interconnect;
223	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
224  } err_data_buffer_t;
225
226  typedef union capabilities_u {
227	struct {
228		u64	i		: 1,
229			d		: 1,
230			rv		: 1,
231			tag		: 1,
232			data		: 1,
233			mesi		: 1,
234			dp		: 1,
235			reserved1	: 3,
236			pa		: 1,
237			va		: 1,
238			wi		: 1,
239			reserved2	: 20,
240			trigger		: 1,
241			trigger_pl	: 1,
242			reserved3	: 30;
243	} capabilities_cache;
244	struct {
245		u64	d		: 1,
246			i		: 1,
247			rv		: 1,
248			tc		: 1,
249			tr		: 1,
250			reserved1	: 27,
251			trigger		: 1,
252			trigger_pl	: 1,
253			reserved2	: 30;
254	} capabilities_tlb;
255	struct {
256		u64	gr_b0		: 1,
257			gr_b1		: 1,
258			fr		: 1,
259			br		: 1,
260			pr		: 1,
261			ar		: 1,
262			cr		: 1,
263			rr		: 1,
264			pkr		: 1,
265			dbr		: 1,
266			ibr		: 1,
267			pmc		: 1,
268			pmd		: 1,
269			reserved1	: 3,
270			regnum		: 1,
271			reserved2	: 15,
272			trigger		: 1,
273			trigger_pl	: 1,
274			reserved3	: 30;
275	} capabilities_register;
276	struct {
277		u64	reserved;
278	} capabilities_bus_processor_interconnect;
279  } capabilities_t;
280
281  typedef struct resources_s {
282	u64	ibr0		: 1,
283		ibr2		: 1,
284		ibr4		: 1,
285		ibr6		: 1,
286		dbr0		: 1,
287		dbr2		: 1,
288		dbr4		: 1,
289		dbr6		: 1,
290		reserved	: 48;
291  } resources_t;
292
293
294  long get_page_size(void)
295  {
296	long page_size=sysconf(_SC_PAGESIZE);
297	return page_size;
298  }
299
300  #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
301  #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
302  #define SHM_VA 0x2000000100000000
303
304  int shmid;
305  void *shmaddr;
306
307  int create_shm(void)
308  {
309	key_t key;
310	char fn[MAX_FN_SIZE];
311
312	/* cpu0 is always existing */
313	sprintf(fn, PATH_FORMAT, 0);
314	if ((key = ftok(fn, 's')) == -1) {
315		perror("ftok");
316		return -1;
317	}
318
319	shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
320	if (shmid == -1) {
321		if (errno==EEXIST) {
322			shmid = shmget(key, SHM_SIZE, 0);
323			if (shmid == -1) {
324				perror("shmget");
325				return -1;
326			}
327		}
328		else {
329			perror("shmget");
330			return -1;
331		}
332	}
333	vbprintf("shmid=%d", shmid);
334
335	/* connect to the segment: */
336	shmaddr = shmat(shmid, (void *)SHM_VA, 0);
337	if (shmaddr == (void*)-1) {
338		perror("shmat");
339		return -1;
340	}
341
342	memset(shmaddr, 0, SHM_SIZE);
343	mlock(shmaddr, SHM_SIZE);
344
345	return 0;
346  }
347
348  int free_shm()
349  {
350	munlock(shmaddr, SHM_SIZE);
351          shmdt(shmaddr);
352	semctl(shmid, 0, IPC_RMID);
353
354	return 0;
355  }
356
357  #ifdef _SEM_SEMUN_UNDEFINED
358  union semun
359  {
360	int val;
361	struct semid_ds *buf;
362	unsigned short int *array;
363	struct seminfo *__buf;
364  };
365  #endif
366
367  u32 mode=1; /* 1: physical mode; 2: virtual mode. */
368  int one_lock=1;
369  key_t key[NR_CPUS];
370  int semid[NR_CPUS];
371
372  int create_sem(int cpu)
373  {
374	union semun arg;
375	char fn[MAX_FN_SIZE];
376	int sid;
377
378	sprintf(fn, PATH_FORMAT, cpu);
379	sprintf(fn, "%s/%s", fn, "err_type_info");
380	if ((key[cpu] = ftok(fn, 'e')) == -1) {
381		perror("ftok");
382		return -1;
383	}
384
385	if (semid[cpu]!=0)
386		return 0;
387
388	/* clear old semaphore */
389	if ((sid = semget(key[cpu], 1, 0)) != -1)
390		semctl(sid, 0, IPC_RMID);
391
392	/* get one semaphore */
393	if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
394		perror("semget");
395		printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
396			(u64)key[cpu]);
397		return -1;
398	}
399
400	vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
401		(u64)key[cpu]);
402	/* initialize the semaphore to 1: */
403	arg.val = 1;
404	if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
405		perror("semctl");
406		return -1;
407	}
408
409	return 0;
410  }
411
412  static int lock(int cpu)
413  {
414	struct sembuf lock;
415
416	lock.sem_num = cpu;
417	lock.sem_op = 1;
418	semop(semid[cpu], &lock, 1);
419
420          return 0;
421  }
422
423  static int unlock(int cpu)
424  {
425	struct sembuf unlock;
426
427	unlock.sem_num = cpu;
428	unlock.sem_op = -1;
429	semop(semid[cpu], &unlock, 1);
430
431          return 0;
432  }
433
434  void free_sem(int cpu)
435  {
436	semctl(semid[cpu], 0, IPC_RMID);
437  }
438
439  int wr_multi(char *fn, unsigned long *data, int size)
440  {
441	int fd;
442	char buf[MAX_BUF_SIZE];
443	int ret;
444
445	if (size==1)
446		sprintf(buf, "%lx", *data);
447	else if (size==3)
448		sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
449	else {
450		fprintf(stderr,"write to file with wrong size!\n");
451		return -1;
452	}
453
454	fd=open(fn, O_RDWR);
455	if (!fd) {
456		perror("Error:");
457		return -1;
458	}
459	ret=write(fd, buf, sizeof(buf));
460	close(fd);
461	return ret;
462  }
463
464  int wr(char *fn, unsigned long data)
465  {
466	return wr_multi(fn, &data, 1);
467  }
468
469  int rd(char *fn, unsigned long *data)
470  {
471	int fd;
472	char buf[MAX_BUF_SIZE];
473
474	fd=open(fn, O_RDONLY);
475	if (fd<0) {
476		perror("Error:");
477		return -1;
478	}
479	read(fd, buf, MAX_BUF_SIZE);
480	*data=strtoul(buf, NULL, 16);
481	close(fd);
482	return 0;
483  }
484
485  int rd_status(char *path, int *status)
486  {
487	char fn[MAX_FN_SIZE];
488	sprintf(fn, "%s/status", path);
489	if (rd(fn, (u64*)status)<0) {
490		perror("status reading error.\n");
491		return -1;
492	}
493
494	return 0;
495  }
496
497  int rd_capabilities(char *path, u64 *capabilities)
498  {
499	char fn[MAX_FN_SIZE];
500	sprintf(fn, "%s/capabilities", path);
501	if (rd(fn, capabilities)<0) {
502		perror("capabilities reading error.\n");
503		return -1;
504	}
505
506	return 0;
507  }
508
509  int rd_all(char *path)
510  {
511	unsigned long err_type_info, err_struct_info, err_data_buffer;
512	int status;
513	unsigned long capabilities, resources;
514	char fn[MAX_FN_SIZE];
515
516	sprintf(fn, "%s/err_type_info", path);
517	if (rd(fn, &err_type_info)<0) {
518		perror("err_type_info reading error.\n");
519		return -1;
520	}
521	printf("err_type_info=%lx\n", err_type_info);
522
523	sprintf(fn, "%s/err_struct_info", path);
524	if (rd(fn, &err_struct_info)<0) {
525		perror("err_struct_info reading error.\n");
526		return -1;
527	}
528	printf("err_struct_info=%lx\n", err_struct_info);
529
530	sprintf(fn, "%s/err_data_buffer", path);
531	if (rd(fn, &err_data_buffer)<0) {
532		perror("err_data_buffer reading error.\n");
533		return -1;
534	}
535	printf("err_data_buffer=%lx\n", err_data_buffer);
536
537	sprintf(fn, "%s/status", path);
538	if (rd("status", (u64*)&status)<0) {
539		perror("status reading error.\n");
540		return -1;
541	}
542	printf("status=%d\n", status);
543
544	sprintf(fn, "%s/capabilities", path);
545	if (rd(fn,&capabilities)<0) {
546		perror("capabilities reading error.\n");
547		return -1;
548	}
549	printf("capabilities=%lx\n", capabilities);
550
551	sprintf(fn, "%s/resources", path);
552	if (rd(fn, &resources)<0) {
553		perror("resources reading error.\n");
554		return -1;
555	}
556	printf("resources=%lx\n", resources);
557
558	return 0;
559  }
560
561  int query_capabilities(char *path, err_type_info_t err_type_info,
562			u64 *capabilities)
563  {
564	char fn[MAX_FN_SIZE];
565	err_struct_info_t err_struct_info;
566	err_data_buffer_t err_data_buffer;
567
568	err_struct_info.err_struct_info=0;
569	memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
570
571	sprintf(fn, "%s/err_type_info", path);
572	wr(fn, err_type_info.err_type_info);
573	sprintf(fn, "%s/err_struct_info", path);
574	wr(fn, 0x0);
575	sprintf(fn, "%s/err_data_buffer", path);
576	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
577
578	// Fire pal_mc_error_inject procedure.
579	sprintf(fn, "%s/call_start", path);
580	wr(fn, mode);
581
582	if (rd_capabilities(path, capabilities)<0)
583		return -1;
584
585	return 0;
586  }
587
588  int query_all_capabilities()
589  {
590	int status;
591	err_type_info_t err_type_info;
592	int err_sev, err_struct, struct_hier;
593	int cap=0;
594	u64 capabilities;
595	char path[MAX_FN_SIZE];
596
597	err_type_info.err_type_info=0;			// Initial
598	err_type_info.err_type_info_u.mode=0;		// Query mode;
599	err_type_info.err_type_info_u.err_inj=0;
600
601	printf("All capabilities implemented in pal_mc_error_inject:\n");
602	sprintf(path, PATH_FORMAT ,0);
603	for (err_sev=0;err_sev<3;err_sev++)
604		for (err_struct=0;err_struct<5;err_struct++)
605			for (struct_hier=0;struct_hier<5;struct_hier++)
606	{
607		status=-1;
608		capabilities=0;
609		err_type_info.err_type_info_u.err_sev=err_sev;
610		err_type_info.err_type_info_u.err_struct=err_struct;
611		err_type_info.err_type_info_u.struct_hier=struct_hier;
612
613		if (query_capabilities(path, err_type_info, &capabilities)<0)
614			continue;
615
616		if (rd_status(path, &status)<0)
617			continue;
618
619		if (status==0) {
620			cap=1;
621			printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
622				err_sev, err_struct, struct_hier);
623			printf("capabilities 0x%lx\n", capabilities);
624		}
625	}
626	if (!cap) {
627		printf("No capabilities supported.\n");
628		return 0;
629	}
630
631	return 0;
632  }
633
634  int err_inject(int cpu, char *path, err_type_info_t err_type_info,
635		err_struct_info_t err_struct_info,
636		err_data_buffer_t err_data_buffer)
637  {
638	int status;
639	char fn[MAX_FN_SIZE];
640
641	log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
642		err_type_info.err_type_info,
643		err_struct_info.err_struct_info);
644	log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
645		err_data_buffer.err_data_buffer[0],
646		err_data_buffer.err_data_buffer[1],
647		err_data_buffer.err_data_buffer[2]);
648	sprintf(fn, "%s/err_type_info", path);
649	wr(fn, err_type_info.err_type_info);
650	sprintf(fn, "%s/err_struct_info", path);
651	wr(fn, err_struct_info.err_struct_info);
652	sprintf(fn, "%s/err_data_buffer", path);
653	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
654
655	// Fire pal_mc_error_inject procedure.
656	sprintf(fn, "%s/call_start", path);
657	wr(fn,mode);
658
659	if (rd_status(path, &status)<0) {
660		vbprintf("fail: read status\n");
661		return -100;
662	}
663
664	if (status!=0) {
665		log_info(cpu, "fail: status=%d\n", status);
666		return status;
667	}
668
669	return status;
670  }
671
672  static int construct_data_buf(char *path, err_type_info_t err_type_info,
673		err_struct_info_t err_struct_info,
674		err_data_buffer_t *err_data_buffer,
675		void *va1)
676  {
677	char fn[MAX_FN_SIZE];
678	u64 virt_addr=0, phys_addr=0;
679
680	vbprintf("va1=%lx\n", (u64)va1);
681	memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
682
683	switch (err_type_info.err_type_info_u.err_struct) {
684		case 1: // Cache
685			switch (err_struct_info.err_struct_info_cache.cl_id) {
686				case 1: //Virtual addr
687					err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
688					break;
689				case 2: //Phys addr
690					sprintf(fn, "%s/virtual_to_phys", path);
691					virt_addr=(u64)va1;
692					if (wr(fn,virt_addr)<0)
693						return -1;
694					rd(fn, &phys_addr);
695					err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
696					break;
697				default:
698					printf("Not supported cl_id\n");
699					break;
700			}
701			break;
702		case 2: //  TLB
703			break;
704		case 3: //  Register file
705			break;
706		case 4: //  Bus/system interconnect
707		default:
708			printf("Not supported err_struct\n");
709			break;
710	}
711
712	return 0;
713  }
714
715  typedef struct {
716	u64 cpu;
717	u64 loop;
718	u64 interval;
719	u64 err_type_info;
720	u64 err_struct_info;
721	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
722  } parameters_t;
723
724  parameters_t line_para;
725  int para;
726
727  static int empty_data_buffer(u64 *err_data_buffer)
728  {
729	int empty=1;
730	int i;
731
732	for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
733	   if (err_data_buffer[i]!=-1)
734		empty=0;
735
736	return empty;
737  }
738
739  int err_inj()
740  {
741	err_type_info_t err_type_info;
742	err_struct_info_t err_struct_info;
743	err_data_buffer_t err_data_buffer;
744	int count;
745	FILE *fp;
746	unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
747	u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
748	int num;
749	int i;
750	char path[MAX_FN_SIZE];
751	parameters_t parameters[MAX_TASK_NUM]={};
752	pid_t child_pid[MAX_TASK_NUM];
753	time_t current_time;
754	int status;
755
756	if (!para) {
757	    fp=fopen("err.conf", "r");
758	    if (fp==NULL) {
759		perror("Error open err.conf");
760		return -1;
761	    }
762
763	    num=0;
764	    while (!feof(fp)) {
765		char buf[256];
766		memset(buf,0,256);
767		fgets(buf, 256, fp);
768		count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
769				&cpu, &loop, &interval,&err_type_info_conf,
770				&err_struct_info_conf,
771				&err_data_buffer_conf[0],
772				&err_data_buffer_conf[1],
773				&err_data_buffer_conf[2]);
774		if (count!=PARA_FIELD_NUM+3) {
775			err_data_buffer_conf[0]=-1;
776			err_data_buffer_conf[1]=-1;
777			err_data_buffer_conf[2]=-1;
778			count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
779				&cpu, &loop, &interval,&err_type_info_conf,
780				&err_struct_info_conf);
781			if (count!=PARA_FIELD_NUM)
782				continue;
783		}
784
785		parameters[num].cpu=cpu;
786		parameters[num].loop=loop;
787		parameters[num].interval= interval>MIN_INTERVAL
788					  ?interval:MIN_INTERVAL;
789		parameters[num].err_type_info=err_type_info_conf;
790		parameters[num].err_struct_info=err_struct_info_conf;
791		memcpy(parameters[num++].err_data_buffer,
792			err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
793
794		if (num>=MAX_TASK_NUM)
795			break;
796	    }
797	}
798	else {
799		parameters[0].cpu=line_para.cpu;
800		parameters[0].loop=line_para.loop;
801		parameters[0].interval= line_para.interval>MIN_INTERVAL
802					  ?line_para.interval:MIN_INTERVAL;
803		parameters[0].err_type_info=line_para.err_type_info;
804		parameters[0].err_struct_info=line_para.err_struct_info;
805		memcpy(parameters[0].err_data_buffer,
806			line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
807
808		num=1;
809	}
810
811	/* Create semaphore: If one_lock, one semaphore for all processors.
812	   Otherwise, one semaphore for each processor. */
813	if (one_lock) {
814		if (create_sem(0)) {
815			printf("Can not create semaphore...exit\n");
816			free_sem(0);
817			return -1;
818		}
819	}
820	else {
821		for (i=0;i<num;i++) {
822		   if (create_sem(parameters[i].cpu)) {
823			printf("Can not create semaphore for cpu%d...exit\n",i);
824			free_sem(parameters[num].cpu);
825			return -1;
826		   }
827		}
828	}
829
830	/* Create a shm segment which will be used to inject/consume errors on.*/
831	if (create_shm()==-1) {
832		printf("Error to create shm...exit\n");
833		return -1;
834	}
835
836	for (i=0;i<num;i++) {
837		pid_t pid;
838
839		current_time=time(NULL);
840		log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
841		log_info(parameters[i].cpu, "Configurations:\n");
842		log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
843			parameters[i].cpu,
844			parameters[i].loop,
845			parameters[i].interval);
846		log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
847			parameters[i].err_type_info,
848			parameters[i].err_struct_info);
849
850		sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
851		err_type_info.err_type_info=parameters[i].err_type_info;
852		err_struct_info.err_struct_info=parameters[i].err_struct_info;
853		memcpy(err_data_buffer.err_data_buffer,
854			parameters[i].err_data_buffer,
855			ERR_DATA_BUFFER_SIZE*8);
856
857		pid=fork();
858		if (pid==0) {
859			unsigned long mask[MASK_SIZE];
860			int j, k;
861
862			void *va1, *va2;
863
864			/* Allocate two memory areas va1 and va2 in shm */
865			va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
866			va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
867
868			vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
869			memset(va1, 0x1, PAGE_SIZE);
870			memset(va2, 0x2, PAGE_SIZE);
871
872			if (empty_data_buffer(err_data_buffer.err_data_buffer))
873				/* If not specified yet, construct data buffer
874				 * with va1
875				 */
876				construct_data_buf(path, err_type_info,
877					err_struct_info, &err_data_buffer,va1);
878
879			for (j=0;j<MASK_SIZE;j++)
880				mask[j]=0;
881
882			cpu=parameters[i].cpu;
883			k = cpu%64;
884			j = cpu/64;
885			mask[j] = 1UL << k;
886
887			if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888				perror("Error sched_setaffinity:");
889				return -1;
890			}
891
892			for (j=0; j<parameters[i].loop; j++) {
893				log_info(parameters[i].cpu,"Injection ");
894				log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
895
896					parameters[i].cpu,j+1, parameters[i].loop);
897
898				/* Hold the lock */
899				if (one_lock)
900					lock(0);
901				else
902				/* Hold lock on this cpu */
903					lock(parameters[i].cpu);
904
905				if ((status=err_inject(parameters[i].cpu,
906					   path, err_type_info,
907					   err_struct_info, err_data_buffer))
908					   ==0) {
909					/* consume the error for "inject only"*/
910					memcpy(va2, va1, PAGE_SIZE);
911					memcpy(va1, va2, PAGE_SIZE);
912					log_info(parameters[i].cpu,
913						"successful\n");
914				}
915				else {
916					log_info(parameters[i].cpu,"fail:");
917					log_info(parameters[i].cpu,
918						"status=%d\n", status);
919					unlock(parameters[i].cpu);
920					break;
921				}
922				if (one_lock)
923				/* Release the lock */
924					unlock(0);
925				/* Release lock on this cpu */
926				else
927					unlock(parameters[i].cpu);
928
929				if (j < parameters[i].loop-1)
930					sleep(parameters[i].interval);
931			}
932			current_time=time(NULL);
933			log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
934			return 0;
935		}
936		else if (pid<0) {
937			perror("Error fork:");
938			continue;
939		}
940		child_pid[i]=pid;
941	}
942	for (i=0;i<num;i++)
943		waitpid(child_pid[i], NULL, 0);
944
945	if (one_lock)
946		free_sem(0);
947	else
948		for (i=0;i<num;i++)
949			free_sem(parameters[i].cpu);
950
951	printf("All done.\n");
952
953	return 0;
954  }
955
956  void help()
957  {
958	printf("err_inject_tool:\n");
959	printf("\t-q: query all capabilities. default: off\n");
960	printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
961	printf("\t-i: inject errors. default: off\n");
962	printf("\t-l: one lock per cpu. default: one lock for all\n");
963	printf("\t-e: error parameters:\n");
964	printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
965	printf("\t\t   cpu: logical cpu number the error will be inject in.\n");
966	printf("\t\t   loop: times the error will be injected.\n");
967	printf("\t\t   interval: In second. every so often one error is injected.\n");
968	printf("\t\t   err_type_info, err_struct_info: PAL parameters.\n");
969	printf("\t\t   err_data_buffer: PAL parameter. Optional. If not present,\n");
970	printf("\t\t                    it's constructed by tool automatically. Be\n");
971	printf("\t\t                    careful to provide err_data_buffer and make\n");
972	printf("\t\t                    sure it's working with the environment.\n");
973	printf("\t    Note:no space between error parameters.\n");
974	printf("\t    default: Take error parameters from err.conf instead of command line.\n");
975	printf("\t-v: verbose. default: off\n");
976	printf("\t-h: help\n\n");
977	printf("The tool will take err.conf file as ");
978	printf("input to inject single or multiple errors ");
979	printf("on one or multiple cpus in parallel.\n");
980  }
981
982  int main(int argc, char **argv)
983  {
984	char c;
985	int do_err_inj=0;
986	int do_query_all=0;
987	int count;
988	u32 m;
989
990	/* Default one lock for all cpu's */
991	one_lock=1;
992	while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
993		switch (c) {
994			case 'm':	/* Procedure mode. 1: phys 2: virt */
995				count=sscanf(optarg, "%x", &m);
996				if (count!=1 || (m!=1 && m!=2)) {
997					printf("Wrong mode number.\n");
998					help();
999					return -1;
1000				}
1001				mode=m;
1002				break;
1003			case 'i':	/* Inject errors */
1004				do_err_inj=1;
1005				break;
1006			case 'q':	/* Query */
1007				do_query_all=1;
1008				break;
1009			case 'v':	/* Verbose */
1010				verbose=1;
1011				break;
1012			case 'l':	/* One lock per cpu */
1013				one_lock=0;
1014				break;
1015			case 'e':	/* error arguments */
1016				/* Take parameters:
1017				 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018				 * err_data_buffer is optional. Recommend not to specify
1019				 * err_data_buffer. Better to use tool to generate it.
1020				 */
1021				count=sscanf(optarg,
1022					"%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023					&line_para.cpu,
1024					&line_para.loop,
1025					&line_para.interval,
1026					&line_para.err_type_info,
1027					&line_para.err_struct_info,
1028					&line_para.err_data_buffer[0],
1029					&line_para.err_data_buffer[1],
1030					&line_para.err_data_buffer[2]);
1031				if (count!=PARA_FIELD_NUM+3) {
1032				    line_para.err_data_buffer[0]=-1,
1033				    line_para.err_data_buffer[1]=-1,
1034				    line_para.err_data_buffer[2]=-1;
1035				    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036						&line_para.cpu,
1037						&line_para.loop,
1038						&line_para.interval,
1039						&line_para.err_type_info,
1040						&line_para.err_struct_info);
1041				    if (count!=PARA_FIELD_NUM) {
1042					printf("Wrong error arguments.\n");
1043					help();
1044					return -1;
1045				    }
1046				}
1047				para=1;
1048				break;
1049			continue;
1050				break;
1051			case 'h':
1052				help();
1053				return 0;
1054			default:
1055				break;
1056		}
1057
1058	if (do_query_all)
1059		query_all_capabilities();
1060	if (do_err_inj)
1061		err_inj();
1062
1063	if (!do_query_all &&  !do_err_inj)
1064		help();
1065
1066	return 0;
1067  }
1068