1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
|
/*
* Hypervisor-assisted dump
*
* Linas Vepstas, Manish Ahuja 2008
* Copyright 2008 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/pfn.h>
#include <linux/swap.h>
#include <linux/sysfs.h>
#include <asm/page.h>
#include <asm/phyp_dump.h>
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/rtas.h>
/* Variables, used to communicate data between early boot and late boot */
static struct phyp_dump phyp_dump_vars;
struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
static int ibm_configure_kernel_dump;
/* ------------------------------------------------- */
/* RTAS interfaces to declare the dump regions */
struct dump_section {
u32 dump_flags;
u16 source_type;
u16 error_flags;
u64 source_address;
u64 source_length;
u64 length_copied;
u64 destination_address;
};
struct phyp_dump_header {
u32 version;
u16 num_of_sections;
u16 status;
u32 first_offset_section;
u32 dump_disk_section;
u64 block_num_dd;
u64 num_of_blocks_dd;
u32 offset_dd;
u32 maxtime_to_auto;
/* No dump disk path string used */
struct dump_section cpu_data;
struct dump_section hpte_data;
struct dump_section kernel_data;
};
/* The dump header *must be* in low memory, so .bss it */
static struct phyp_dump_header phdr;
#define NUM_DUMP_SECTIONS 3
#define DUMP_HEADER_VERSION 0x1
#define DUMP_REQUEST_FLAG 0x1
#define DUMP_SOURCE_CPU 0x0001
#define DUMP_SOURCE_HPTE 0x0002
#define DUMP_SOURCE_RMO 0x0011
#define DUMP_ERROR_FLAG 0x2000
#define DUMP_TRIGGERED 0x4000
#define DUMP_PERFORMED 0x8000
/**
* init_dump_header() - initialize the header declaring a dump
* Returns: length of dump save area.
*
* When the hypervisor saves crashed state, it needs to put
* it somewhere. The dump header tells the hypervisor where
* the data can be saved.
*/
static unsigned long init_dump_header(struct phyp_dump_header *ph)
{
unsigned long addr_offset = 0;
/* Set up the dump header */
ph->version = DUMP_HEADER_VERSION;
ph->num_of_sections = NUM_DUMP_SECTIONS;
ph->status = 0;
ph->first_offset_section =
(u32)offsetof(struct phyp_dump_header, cpu_data);
ph->dump_disk_section = 0;
ph->block_num_dd = 0;
ph->num_of_blocks_dd = 0;
ph->offset_dd = 0;
ph->maxtime_to_auto = 0; /* disabled */
/* The first two sections are mandatory */
ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
ph->cpu_data.source_type = DUMP_SOURCE_CPU;
ph->cpu_data.source_address = 0;
ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
ph->cpu_data.destination_address = addr_offset;
addr_offset += phyp_dump_info->cpu_state_size;
ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
ph->hpte_data.source_address = 0;
ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
ph->hpte_data.destination_address = addr_offset;
addr_offset += phyp_dump_info->hpte_region_size;
/* This section describes the low kernel region */
ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
ph->kernel_data.source_type = DUMP_SOURCE_RMO;
ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
ph->kernel_data.destination_address = addr_offset;
addr_offset += ph->kernel_data.source_length;
return addr_offset;
}
static void print_dump_header(const struct phyp_dump_header *ph)
{
#ifdef DEBUG
if (ph == NULL)
return;
printk(KERN_INFO "dump header:\n");
/* setup some ph->sections required */
printk(KERN_INFO "version = %d\n", ph->version);
printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
printk(KERN_INFO "Status = 0x%x\n", ph->status);
/* No ph->disk, so all should be set to 0 */
printk(KERN_INFO "Offset to first section 0x%x\n",
ph->first_offset_section);
printk(KERN_INFO "dump disk sections should be zero\n");
printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
/*set cpu state and hpte states as well scratch pad area */
printk(KERN_INFO " CPU AREA\n");
printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
printk(KERN_INFO "cpu source_address =%llx\n",
ph->cpu_data.source_address);
printk(KERN_INFO "cpu source_length =%llx\n",
ph->cpu_data.source_length);
printk(KERN_INFO "cpu length_copied =%llx\n",
ph->cpu_data.length_copied);
printk(KERN_INFO " HPTE AREA\n");
printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
printk(KERN_INFO "HPTE source_address =%llx\n",
ph->hpte_data.source_address);
printk(KERN_INFO "HPTE source_length =%llx\n",
ph->hpte_data.source_length);
printk(KERN_INFO "HPTE length_copied =%llx\n",
ph->hpte_data.length_copied);
printk(KERN_INFO " SRSD AREA\n");
printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
printk(KERN_INFO "SRSD source_address =%llx\n",
ph->kernel_data.source_address);
printk(KERN_INFO "SRSD source_length =%llx\n",
ph->kernel_data.source_length);
printk(KERN_INFO "SRSD length_copied =%llx\n",
ph->kernel_data.length_copied);
#endif
}
static ssize_t show_phyp_dump_active(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
/* create filesystem entry so kdump is phyp-dump aware */
return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
}
static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
show_phyp_dump_active,
NULL);
static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
{
int rc;
/* Add addr value if not initialized before */
if (ph->cpu_data.destination_address == 0) {
ph->cpu_data.destination_address += addr;
ph->hpte_data.destination_address += addr;
ph->kernel_data.destination_address += addr;
}
/* ToDo Invalidate kdump and free memory range. */
do {
rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
1, ph, sizeof(struct phyp_dump_header));
} while (rtas_busy_delay(rc));
if (rc) {
printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
"register\n", rc);
print_dump_header(ph);
return;
}
rc = sysfs_create_file(kernel_kobj, &pdl.attr);
if (rc)
printk(KERN_ERR "phyp-dump: unable to create sysfs"
" file (%d)\n", rc);
}
static
void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
{
int rc;
/* Add addr value if not initialized before */
if (ph->cpu_data.destination_address == 0) {
ph->cpu_data.destination_address += addr;
ph->hpte_data.destination_address += addr;
ph->kernel_data.destination_address += addr;
}
do {
rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
2, ph, sizeof(struct phyp_dump_header));
} while (rtas_busy_delay(rc));
if (rc) {
printk(KERN_ERR "phyp-dump: unexpected error (%d) "
"on invalidate\n", rc);
print_dump_header(ph);
}
}
/* ------------------------------------------------- */
/**
* release_memory_range -- release memory previously lmb_reserved
* @start_pfn: starting physical frame number
* @nr_pages: number of pages to free.
*
* This routine will release memory that had been previously
* lmb_reserved in early boot. The released memory becomes
* available for genreal use.
*/
static void release_memory_range(unsigned long start_pfn,
unsigned long nr_pages)
{
struct page *rpage;
unsigned long end_pfn;
long i;
end_pfn = start_pfn + nr_pages;
for (i = start_pfn; i <= end_pfn; i++) {
rpage = pfn_to_page(i);
if (PageReserved(rpage)) {
ClearPageReserved(rpage);
init_page_count(rpage);
__free_page(rpage);
totalram_pages++;
}
}
}
/**
* track_freed_range -- Counts the range being freed.
* Once the counter goes to zero, it re-registers dump for
* future use.
*/
static void
track_freed_range(unsigned long addr, unsigned long length)
{
static unsigned long scratch_area_size, reserved_area_size;
if (addr < phyp_dump_info->init_reserve_start)
return;
if ((addr >= phyp_dump_info->init_reserve_start) &&
(addr <= phyp_dump_info->init_reserve_start +
phyp_dump_info->init_reserve_size))
reserved_area_size += length;
if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
(addr <= phyp_dump_info->reserved_scratch_addr +
phyp_dump_info->reserved_scratch_size))
scratch_area_size += length;
if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
(scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
invalidate_last_dump(&phdr,
phyp_dump_info->reserved_scratch_addr);
register_dump_area(&phdr,
phyp_dump_info->reserved_scratch_addr);
}
}
/* ------------------------------------------------- */
/**
* sysfs_release_region -- sysfs interface to release memory range.
*
* Usage:
* "echo <start addr> <length> > /sys/kernel/release_region"
*
* Example:
* "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
*
* will release 256MB starting at 1GB.
*/
static ssize_t store_release_region(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long start_addr, length, end_addr;
unsigned long start_pfn, nr_pages;
ssize_t ret;
ret = sscanf(buf, "%lx %lx", &start_addr, &length);
if (ret != 2)
return -EINVAL;
track_freed_range(start_addr, length);
/* Range-check - don't free any reserved memory that
* wasn't reserved for phyp-dump */
if (start_addr < phyp_dump_info->init_reserve_start)
start_addr = phyp_dump_info->init_reserve_start;
end_addr = phyp_dump_info->init_reserve_start +
phyp_dump_info->init_reserve_size;
if (start_addr+length > end_addr)
length = end_addr - start_addr;
/* Release the region of memory assed in by user */
start_pfn = PFN_DOWN(start_addr);
nr_pages = PFN_DOWN(length);
release_memory_range(start_pfn, nr_pages);
return count;
}
static ssize_t show_release_region(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
u64 second_addr_range;
/* total reserved size - start of scratch area */
second_addr_range = phyp_dump_info->init_reserve_size -
phyp_dump_info->reserved_scratch_size;
return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
" DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
phdr.cpu_data.destination_address,
phdr.cpu_data.length_copied,
phdr.hpte_data.destination_address,
phdr.hpte_data.length_copied,
phdr.kernel_data.destination_address,
phdr.kernel_data.length_copied,
phyp_dump_info->init_reserve_start,
second_addr_range);
}
static struct kobj_attribute rr = __ATTR(release_region, 0600,
show_release_region,
store_release_region);
static int __init phyp_dump_setup(void)
{
struct device_node *rtas;
const struct phyp_dump_header *dump_header = NULL;
unsigned long dump_area_start;
unsigned long dump_area_length;
int header_len = 0;
int rc;
/* If no memory was reserved in early boot, there is nothing to do */
if (phyp_dump_info->init_reserve_size == 0)
return 0;
/* Return if phyp dump not supported */
if (!phyp_dump_info->phyp_dump_configured)
return -ENOSYS;
/* Is there dump data waiting for us? If there isn't,
* then register a new dump area, and release all of
* the rest of the reserved ram.
*
* The /rtas/ibm,kernel-dump rtas node is present only
* if there is dump data waiting for us.
*/
rtas = of_find_node_by_path("/rtas");
if (rtas) {
dump_header = of_get_property(rtas, "ibm,kernel-dump",
&header_len);
of_node_put(rtas);
}
ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
print_dump_header(dump_header);
dump_area_length = init_dump_header(&phdr);
/* align down */
dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
if (dump_header == NULL) {
register_dump_area(&phdr, dump_area_start);
return 0;
}
/* re-register the dump area, if old dump was invalid */
if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
invalidate_last_dump(&phdr, dump_area_start);
register_dump_area(&phdr, dump_area_start);
return 0;
}
if (dump_header) {
phyp_dump_info->reserved_scratch_addr =
dump_header->cpu_data.destination_address;
phyp_dump_info->reserved_scratch_size =
dump_header->cpu_data.source_length +
dump_header->hpte_data.source_length +
dump_header->kernel_data.source_length;
}
/* Should we create a dump_subsys, analogous to s390/ipl.c ? */
rc = sysfs_create_file(kernel_kobj, &rr.attr);
if (rc)
printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
rc);
/* ToDo: re-register the dump area, for next time. */
return 0;
}
machine_subsys_initcall(pseries, phyp_dump_setup);
int __init early_init_dt_scan_phyp_dump(unsigned long node,
const char *uname, int depth, void *data)
{
const unsigned int *sizes;
phyp_dump_info->phyp_dump_configured = 0;
phyp_dump_info->phyp_dump_is_active = 0;
if (depth != 1 || strcmp(uname, "rtas") != 0)
return 0;
if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
phyp_dump_info->phyp_dump_configured++;
if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
phyp_dump_info->phyp_dump_is_active++;
sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
NULL);
if (!sizes)
return 0;
if (sizes[0] == 1)
phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
if (sizes[3] == 2)
phyp_dump_info->hpte_region_size =
*((unsigned long *)&sizes[4]);
return 1;
}
/* Look for phyp_dump= cmdline option */
static int __init early_phyp_dump_enabled(char *p)
{
phyp_dump_info->phyp_dump_at_boot = 1;
if (!p)
return 0;
if (strncmp(p, "1", 1) == 0)
phyp_dump_info->phyp_dump_at_boot = 1;
else if (strncmp(p, "0", 1) == 0)
phyp_dump_info->phyp_dump_at_boot = 0;
return 0;
}
early_param("phyp_dump", early_phyp_dump_enabled);
/* Look for phyp_dump_reserve_size= cmdline option */
static int __init early_phyp_dump_reserve_size(char *p)
{
if (p)
phyp_dump_info->reserve_bootvar = memparse(p, &p);
return 0;
}
early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);
|