Index: sys/arch/x86/x86/x86_machdep.c =================================================================== RCS file: /cvsroot/src/sys/arch/x86/x86/x86_machdep.c,v retrieving revision 1.27 diff -u -p -r1.27 x86_machdep.c --- sys/arch/x86/x86/x86_machdep.c 15 Dec 2008 22:20:52 -0000 1.27 +++ sys/arch/x86/x86/x86_machdep.c 15 Dec 2008 23:36:26 -0000 @@ -46,6 +46,7 @@ __KERNEL_RCSID(0, "$NetBSD: x86_machdep. #include #include #include +#include #include #include @@ -111,6 +112,9 @@ check_pa_acc(paddr_t pa, vm_prot_t prot) const phys_ram_seg_t *seg = &mem_clusters[i]; paddr_t lstart = seg->start; + if (seg->type != PMEM_U_RAM) + continue; + if (lstart <= pa && pa - lstart <= seg->size) { return 0; } @@ -120,6 +124,9 @@ check_pa_acc(paddr_t pa, vm_prot_t prot) KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL); } + +#define DEBUG_MEMLOAD + /* * This function is to initialize the mutex used by x86/msr_ipifuncs.c. */ @@ -411,7 +418,7 @@ add_mem_cluster(phys_ram_seg_t *seg_clus * the addresses are page rounded just to make * sure we get them all. */ - if (seg_start < 0x100000000ULL) { + if (type != PMEM_U_UNKNOWN && seg_start < 0x100000000ULL) { uint64_t io_end; if (seg_end > 0x100000000ULL) @@ -430,12 +437,6 @@ add_mem_cluster(phys_ram_seg_t *seg_clus } } - /* - * If it's not free memory, skip it. - */ - if (type != BIM_Memory) - return seg_cluster_cnt; - /* XXX XXX XXX */ if (seg_cluster_cnt >= VM_PHYSSEG_MAX) panic("%s: too many memory segments (increase VM_PHYSSEG_MAX)", @@ -456,23 +457,25 @@ add_mem_cluster(phys_ram_seg_t *seg_clus cluster = &seg_clusters[seg_cluster_cnt]; cluster->start = seg_start; - if (iomem_ex != NULL) + if (type == PMEM_U_RAM && iomem_ex != NULL) new_physmem = physmem + atop(seg_end - seg_start); #ifdef PHYSMEM_MAX_SIZE - if (iomem_ex != NULL) { + if (type == PMEM_U_RAM && iomem_ex != NULL) { if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE))) return seg_cluster_cnt; if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) { - seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem); + seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) + - ptoa(physmem); new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE)); } } #endif cluster->size = seg_end - seg_start; + cluster->type = type; - if (iomem_ex != NULL) { + if (type == PMEM_U_RAM && iomem_ex != NULL) { if (avail_end < seg_end) avail_end = seg_end; physmem = new_physmem; @@ -505,13 +508,16 @@ initx86_parse_memmap(struct btinfo_memma addr, size, type); #endif - /* - * If the segment is not memory, skip it. - */ switch (type) { case BIM_Memory: + type = PMEM_U_RAM; + break; + case BIM_Reserved: + type = PMEM_U_UNKNOWN; + break; case BIM_ACPI: case BIM_NVS: + type = PMEM_U_FIRMWARE; break; default: continue; @@ -546,6 +552,9 @@ initx86_parse_memmap(struct btinfo_memma mem_clusters, mem_cluster_cnt, iomem_ex, seg_start, 0xa0000, type); mem_cluster_cnt = add_mem_cluster( + mem_clusters, mem_cluster_cnt, NULL, + 0xa0000, 0x100000, PMEM_U_ROM); + mem_cluster_cnt = add_mem_cluster( mem_clusters, mem_cluster_cnt, iomem_ex, 0x100000, seg_end, type); } else @@ -579,6 +588,7 @@ initx86_fake_memmap(struct extent *iomem cluster = &mem_clusters[0]; cluster->start = 0; cluster->size = trunc_page(KBTOB(biosbasemem)); + cluster->type = PMEM_U_RAM; physmem += atop(cluster->size); if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem), @@ -614,6 +624,7 @@ initx86_fake_memmap(struct extent *iomem cluster = &mem_clusters[1]; cluster->start = IOM_END; cluster->size = trunc_page(KBTOB(biosextmem)); + cluster->type = PMEM_U_RAM; physmem += atop(cluster->size); mem_cluster_cnt = 2; @@ -633,6 +644,7 @@ initx86_load_memmap(paddr_t first_avail) { uint64_t seg_start, seg_end; uint64_t seg_start1, seg_end1; + uint32_t seg_type; int first16q, x; /* @@ -666,6 +678,7 @@ initx86_load_memmap(paddr_t first_avail) for (x = 0; x < mem_cluster_cnt; x++) { const phys_ram_seg_t *cluster = &mem_clusters[x]; + seg_type = cluster->type; seg_start = cluster->start; seg_end = cluster->start + cluster->size; seg_start1 = 0; @@ -694,6 +707,9 @@ initx86_load_memmap(paddr_t first_avail) seg_end1 = seg_end; seg_end = IOM_END; KASSERT(seg_end < seg_end1); + + pmem_region_create(IOM_END, first_avail, + PMEM_U_TEXT, PMEM_PROT_UNKNOWN, PMEM_P_UNKNOWN); } /* First hunk */ @@ -708,29 +724,46 @@ initx86_load_memmap(paddr_t first_avail) tmp = seg_end; if (tmp != seg_start) { + pmem_region_create(seg_start, tmp, + seg_type, + PMEM_PROT_UNKNOWN, + PMEM_P_UNKNOWN); + + if (seg_type == PMEM_U_RAM) { #ifdef DEBUG_MEMLOAD - printf("loading 0x%"PRIx64"-0x%"PRIx64 - " (0x%lx-0x%lx)\n", - seg_start, tmp, - atop(seg_start), atop(tmp)); -#endif - uvm_page_physload(atop(seg_start), - atop(tmp), atop(seg_start), - atop(tmp), first16q); + printf("loading 0x%"PRIx64"-0x%"PRIx64 + " (0x%lx-0x%lx), 0x%x\n", + seg_start, tmp, + atop(seg_start), atop(tmp), + seg_type); +#endif + uvm_page_physload( + atop(seg_start), atop(tmp), + atop(seg_start), atop(tmp), + first16q); + } } seg_start = tmp; } if (seg_start != seg_end) { + pmem_region_create(seg_start, seg_end, + seg_type, + PMEM_PROT_UNKNOWN, + PMEM_P_UNKNOWN); + + if (seg_type == PMEM_U_RAM) { #ifdef DEBUG_MEMLOAD - printf("loading 0x%"PRIx64"-0x%"PRIx64 - " (0x%lx-0x%lx)\n", - seg_start, seg_end, - atop(seg_start), atop(seg_end)); -#endif - uvm_page_physload(atop(seg_start), - atop(seg_end), atop(seg_start), - atop(seg_end), VM_FREELIST_DEFAULT); + printf("loading 0x%"PRIx64"-0x%"PRIx64 + " (0x%lx-0x%lx), 0x%x\n", + seg_start, seg_end, + atop(seg_start), atop(seg_end), + seg_type); +#endif + uvm_page_physload(atop(seg_start), + atop(seg_end), atop(seg_start), + atop(seg_end), VM_FREELIST_DEFAULT); + } } } @@ -746,29 +779,46 @@ initx86_load_memmap(paddr_t first_avail) tmp = seg_end1; if (tmp != seg_start1) { + pmem_region_create(seg_start1, tmp, + seg_type, + PMEM_PROT_UNKNOWN, + PMEM_P_UNKNOWN); + + if (seg_type == PMEM_U_RAM) { #ifdef DEBUG_MEMLOAD - printf("loading 0x%"PRIx64"-0x%"PRIx64 - " (0x%lx-0x%lx)\n", - seg_start1, tmp, - atop(seg_start1), atop(tmp)); -#endif - uvm_page_physload(atop(seg_start1), - atop(tmp), atop(seg_start1), - atop(tmp), first16q); + printf("loading 0x%"PRIx64"-0x%"PRIx64 + " (0x%lx-0x%lx), 0x%x\n", + seg_start1, tmp, + atop(seg_start1), atop(tmp), + seg_type); +#endif + uvm_page_physload( + atop(seg_start1), atop(tmp), + atop(seg_start1), atop(tmp), + first16q); + } } seg_start1 = tmp; } if (seg_start1 != seg_end1) { + pmem_region_create(seg_start1, seg_end1, + seg_type, + PMEM_PROT_UNKNOWN, + PMEM_P_UNKNOWN); + + if (seg_type == PMEM_U_RAM) { #ifdef DEBUG_MEMLOAD - printf("loading 0x%"PRIx64"-0x%"PRIx64 - " (0x%lx-0x%lx)\n", - seg_start1, seg_end1, - atop(seg_start1), atop(seg_end1)); -#endif - uvm_page_physload(atop(seg_start1), - atop(seg_end1), atop(seg_start1), - atop(seg_end1), VM_FREELIST_DEFAULT); + printf("loading 0x%"PRIx64"-0x%"PRIx64 + " (0x%lx-0x%lx), 0x%x\n", + seg_start1, seg_end1, + atop(seg_start1), atop(seg_end1), + seg_type); +#endif + uvm_page_physload(atop(seg_start1), + atop(seg_end1), atop(seg_start1), + atop(seg_end1), VM_FREELIST_DEFAULT); + } } } } Index: sys/conf/files =================================================================== RCS file: /cvsroot/src/sys/conf/files,v retrieving revision 1.930 diff -u -p -r1.930 files --- sys/conf/files 11 Dec 2008 05:42:18 -0000 1.930 +++ sys/conf/files 15 Dec 2008 23:36:27 -0000 @@ -1443,6 +1443,7 @@ file kern/subr_kobj.c file kern/subr_lockdebug.c file kern/subr_log.c file kern/subr_percpu.c +file kern/subr_pmem.c file kern/subr_pool.c file kern/subr_prf.c file kern/subr_prf2.c Index: sys/kern/subr_pmem.c =================================================================== RCS file: sys/kern/subr_pmem.c diff -N sys/kern/subr_pmem.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/subr_pmem.c 15 Dec 2008 23:36:28 -0000 @@ -0,0 +1,733 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include +#include +#include +#include +#include +#include /* for "cold" */ + +#include /* for PAGE_SIZE */ + +/* structs */ + +#define PMEM_STORAGE 0 +#define PMEM_STORAGE_STATIC 1 + +struct pmem_dimm { + struct pmem_arena *d_pmarena; /* pointer to pmem_arena */ + vmem_t *d_vmem; + vmem_addr_t d_vmemaddr; + TAILQ_ENTRY(pmem_dimm) d_entry; + TAILQ_ENTRY(pmem_dimm) d_arena_entry; + + struct pmem_dimm_spec d_spec; + uint32_t d_storagetype; +}; + +struct pmem_phys_region { + struct pmem_arena *r_pmarena; /* pointer to pmem_arena */ + vmem_t *r_vmem; + vmem_addr_t r_vmemaddr; + TAILQ_ENTRY(pmem_phys_region) r_entry; + TAILQ_ENTRY(pmem_phys_region) r_arena_entry; + + struct pmem_region_spec r_spec; + uint32_t r_storagetype; +}; + +struct pmem_arena { + paddr_t pa_start; + paddr_t pa_end; + + struct numa_info *pa_numainfo; + + TAILQ_ENTRY(pmem_arena) pa_entry; + + TAILQ_HEAD(, pmem_dimm) pa_dimms; + TAILQ_HEAD(, pmem_phys_region) pa_regions; + uint32_t pa_storagetype; +}; + +/* static storage for early bootstrapping */ + +#ifndef PMEM_DIMM_STORAGE +#define PMEM_DIMM_STORAGE 4 +#endif +static struct pmem_dimm pmem_dimm_storage[PMEM_DIMM_STORAGE]; +static int pmem_ndimms = 0; + +static struct pmem_phys_region pmem_phys_region_storage[VM_PHYSSEG_MAX]; +static int pmem_nphys_regions = 0; + +#ifndef PMEM_ARENA_STORAGE +#define PMEM_ARENA_STORAGE 1 +#endif +static struct pmem_arena pmem_arena_storage[PMEM_ARENA_STORAGE]; +static int pmem_narenas = 0; + +/* lists */ + +static TAILQ_HEAD(pmem_dimm_head, pmem_dimm) pmem_dimm_head = + TAILQ_HEAD_INITIALIZER(pmem_dimm_head); + +static TAILQ_HEAD(pmem_region_head, pmem_phys_region) pmem_region_head = + TAILQ_HEAD_INITIALIZER(pmem_region_head); + +static TAILQ_HEAD(pmem_arena_head, pmem_arena) pmem_arena_head = + TAILQ_HEAD_INITIALIZER(pmem_arena_head); + +/* macros */ + +#define DIMM_ADD(dimm) \ + TAILQ_INSERT_TAIL(&(pmem_dimm_head), (dimm), d_entry) +#define DIMM_REMOVE(dimm) \ + TAILQ_REMOVE(&(pmem_dimm_head), (dimm), d_entry) +#define DIMM_FOREACH(dimm) \ + TAILQ_FOREACH((dimm), &(pmem_dimm_head), d_entry) + +#define REGION_ADD(region) \ + TAILQ_INSERT_TAIL(&(pmem_region_head), (region), r_entry) +#define REGION_REMOVE(region) \ + TAILQ_REMOVE(&(pmem_region_head), (region), r_entry) +#define REGION_FOREACH(idx) \ + TAILQ_FOREACH((idx), &(pmem_region_head), r_entry) +#define REGION_FIRST \ + TAILQ_FIRST(&(pmem_region_head)) +#define REGION_NEXT(region) \ + TAILQ_NEXT((region), r_entry) + + +#define ARENA_ADD(arena) \ + TAILQ_INSERT_TAIL(&(pmem_arena_head), (arena), pa_entry) +#define ARENA_REMOVE(arena) \ + TAILQ_REMOVE(&(pmem_arena_head), (arena), pa_entry) +#define ARENA_FOREACH(arena) \ + TAILQ_FOREACH((arena), &(pmem_arena_head), pa_entry) + + +#define ARENA_DIMM_INIT(arena) \ + TAILQ_INIT(&(arena)->pa_dimms) +#define ARENA_DIMM_ADD(arena, dimm) \ + TAILQ_INSERT_TAIL(&(arena)->pa_dimms, (dimm), d_arena_entry) +#define ARENA_DIMM_REMOVE(arena, dimm) \ + TAILQ_REMOVE(&(arena)->pa_dimms, (dimm), d_arena_entry) +#define ARENA_DIMM_EMPTY(arena) \ + TAILQ_EMPTY(&(arena)->pa_dimms) +#define ARENA_DIMM_FOREACH(arena, dimm) \ + TAILQ_FOREACH((dimm), &(arena)->pa_dimms, d_arena_entry) + +#define ARENA_REGION_INIT(arena) \ + TAILQ_INIT(&(arena)->pa_regions) +#define ARENA_REGION_ADD(arena, region) \ + TAILQ_INSERT_TAIL(&(arena)->pa_regions, region, r_arena_entry) +#define ARENA_REGION_REMOVE(arena, region) \ + TAILQ_REMOVE(&(arena)->pa_regions, region, r_arena_entry) +#define ARENA_REGION_EMPTY(arena) \ + TAILQ_EMPTY(&(arena)->pa_regions) +#define ARENA_REGION_FOREACH(arena, region) \ + TAILQ_FOREACH((region), &(arena)->pa_regions, r_arena_entry) + + +/* internal */ + +static struct pmem_dimm * +pmem_dimm_alloc(paddr_t start, paddr_t end, + pmem_type_t type, uint32_t serial) +{ + struct pmem_dimm *dimm; + + KASSERT(start < end); + + if (cold && pmem_ndimms < PMEM_DIMM_STORAGE) { + /* preload */ + dimm = &pmem_dimm_storage[pmem_ndimms]; + dimm->d_storagetype = PMEM_STORAGE_STATIC; + pmem_ndimms++; + } else { + dimm = kmem_zalloc(sizeof(struct pmem_dimm), KM_NOSLEEP); + if (dimm == NULL) + return NULL; + dimm->d_storagetype = PMEM_STORAGE; + } + + KASSERT(dimm != NULL); + dimm->d_spec.d_start = start; + dimm->d_spec.d_end = end; + dimm->d_spec.d_type = type; + dimm->d_spec.d_serial = serial; + + return dimm; +} + +static struct pmem_phys_region * +pmem_region_alloc(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + struct pmem_phys_region *region; + + KASSERT(start < end); + + if (cold && pmem_nphys_regions < VM_PHYSSEG_MAX) { + /* preload */ + region = &pmem_phys_region_storage[pmem_nphys_regions]; + region->r_storagetype = PMEM_STORAGE_STATIC; + pmem_nphys_regions++; + } else { + region = kmem_zalloc(sizeof(struct pmem_phys_region), + KM_NOSLEEP); + if (region == NULL) + return NULL; + region->r_storagetype = PMEM_STORAGE; + } + + KASSERT(region != NULL); + region->r_spec.r_start = start; + region->r_spec.r_end = end; + region->r_spec.r_type = type; + region->r_spec.r_prot = prot; + region->r_spec.r_props = props; + + return region; +} + +static bool +pmem_region_match(const struct pmem_phys_region *region, + paddr_t start, paddr_t end) +{ + KASSERT(region != NULL); + KASSERT(start < end); + + if (region->r_spec.r_end <= start) + return false; + if (region->r_spec.r_start >= end) + return false; + + return true; +} + +static struct pmem_phys_region * +pmem_region_search(const struct pmem_phys_region *region, + paddr_t start, paddr_t end) +{ + struct pmem_phys_region *reg; + + KASSERT(start < end); + + if (region == NULL) + reg = REGION_FIRST; + else + reg = REGION_NEXT(region); + + while (reg != NULL) { + if (pmem_region_match(reg, start, end)) + return reg; + + reg = REGION_NEXT(reg); + } + + return NULL; +} + +static struct pmem_arena * +pmem_arena_alloc(paddr_t start, paddr_t end) +{ + struct pmem_arena *arena; + + KASSERT(start < end || (start == 0 && end == 0)); + + if (pmem_narenas < PMEM_ARENA_STORAGE) { + /* preload */ + arena = &pmem_arena_storage[pmem_narenas]; + arena->pa_storagetype = PMEM_STORAGE_STATIC; + pmem_narenas++; + } else { + arena = kmem_zalloc(sizeof(struct pmem_arena), + KM_NOSLEEP); + if (arena == NULL) + return NULL; + arena->pa_storagetype = PMEM_STORAGE; + } + + KASSERT(arena != NULL); + arena->pa_start = start; + arena->pa_end = end; + ARENA_DIMM_INIT(arena); + ARENA_REGION_INIT(arena); + + return arena; +} + +static int +pmem_arena_add_region(struct pmem_arena *arena, + struct pmem_phys_region *region) +{ + int error; + + KASSERT(arena != NULL); + KASSERT(region != NULL); + + KASSERT(region != NULL); + region->r_vmem = vmem_create("pmem_region", + region->r_spec.r_start, + region->r_spec.r_end - region->r_spec.r_start, + PAGE_SIZE, + NULL, NULL, + NULL, /* vmem backend */ + 0, /* qcache_max */ + VM_NOSLEEP, IPL_NONE); + if (region->r_vmem == NULL) { + error = ENOMEM; + goto err0; + } + + region->r_pmarena = arena; + ARENA_REGION_ADD(arena, region); + + /* XXX register callback handler */ + + return 0; + +err0: + return error; +} + +/* API */ + +/* Load physical addresses [start, end) having the given default properties. + */ +int +pmem_region_create(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + struct pmem_phys_region *region; + + KASSERT(start < end); + + region = pmem_region_alloc(start, end, type, prot, props); + if (region == NULL) + return ENOMEM; + + REGION_ADD(region); + + return 0; +} + +/* Connect loaded physical addresses with this arena. */ +int +pmem_arena_add_regions(struct pmem_arena *arena) +{ + int error = 0; + struct pmem_phys_region *region; + + KASSERT(arena != NULL); + + REGION_FOREACH(region) { + /* already assigned? */ + if (region->r_pmarena != NULL) + continue; + KASSERT(region->r_vmem == NULL); + if (region->r_spec.r_start < arena->pa_start) + continue; + if (region->r_spec.r_end > arena->pa_end) + continue; + + error = pmem_arena_add_region(arena, region); + if (error) + goto out; + } + +out: + return error; +} + +int +pmem_arena_add_dimm(struct pmem_arena *arena, + paddr_t start, paddr_t end, pmem_type_t type, uint32_t serial) +{ + int error; + struct pmem_dimm *dimm; + + KASSERT(arena != NULL); + KASSERT(end > start); + + dimm = pmem_dimm_alloc(start, end, type, serial); + if (dimm == NULL) { + error = ENOMEM; + goto err0; + } + + KASSERT(dimm != NULL); + dimm->d_pmarena = arena; + + DIMM_ADD(dimm); + ARENA_DIMM_ADD(arena, dimm); + + /* XXX register callback handler */ + + return 0; + +err0: + return error; +} + +struct pmem_arena * +pmem_arena_create(struct numa_info *ni, paddr_t start, paddr_t end) +{ + struct pmem_arena *arena; + KASSERT(start < end); + KASSERT(ni != NULL); + + arena = pmem_arena_alloc(start, end); + if (arena == NULL) + return NULL; + + arena->pa_numainfo = ni; + ARENA_ADD(arena); + + return arena; +} + +/* + * Set arena size indirectly. + * Only for bootstrapping code. + */ +int +pmem_arena_loadrange(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type) +{ + struct pmem_phys_region *region = NULL; + + KASSERT(arena != NULL); + KASSERT(start < end); + KASSERT(cold); /* this is only for bootstrapping */ + + if (start < arena->pa_start) + arena->pa_start = start; + if (end > arena->pa_end) + arena->pa_end = end; + + if (type == PMEM_U_UNKNOWN) + return 0; + + while ((region = pmem_region_search(region, start, end)) != NULL) { + KASSERT(region != NULL); + if (region->r_spec.r_type != PMEM_U_UNKNOWN) + continue; + + region->r_spec.r_type = type; + } + + return 0; +} + +int +pmem_arena_prime(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + int error; + struct pmem_phys_region *region; + + KASSERT(arena != NULL); + KASSERT(start > end); + + region = pmem_region_alloc(start, end, type, prot, props); + if (region == NULL) { + error = ENOMEM; + goto err0; + } + + error = pmem_arena_add_region(arena, region); + if (error) + goto err1; + + REGION_ADD(region); + return 0; + +err1: + kmem_free(region, sizeof(struct pmem_region)); +err0: + return error; +} + +/* Connect two arenas. */ +int +pmem_arena_connect(struct pmem_arena *left, struct pmem_arena *right, + struct pmem_mapping *m, pmem_metric_t metric) +{ + KASSERT(left != NULL); + KASSERT(right != NULL); + + return 0; +} + +/* Reserve a region in arena `a' that meets the given criteria. + * The region is returned with a reference count of at least 1. + */ +struct pmem_region * +pmem_alloc(struct pmem_arena *arena, paddr_t minaddr, paddr_t maxaddr, + pmem_prot_t prot, pmem_props_t props, pmem_type_t type, + size_t align, size_t phase, size_t size, size_t nocross, + vm_flag_t flags, pmem_metric_t maxmetric) +{ + struct pmem_region *r = NULL; + struct pmem_phys_region *tmp; + vmem_addr_t addr; + + KASSERT(arena != NULL); + KASSERT(maxaddr > minaddr); + KASSERT(size > 0); + KASSERT((maxaddr - minaddr) >= size); + KASSERT((flags & (VM_BESTFIT|VM_INSTANTFIT)) != 0); + KASSERT((~flags & (VM_BESTFIT|VM_INSTANTFIT)) != 0); + + /* Clip search area to the arena. */ + if (arena->pa_start > minaddr) + minaddr = arena->pa_start; + if (arena->pa_end < maxaddr) + maxaddr = arena->pa_end; + + /* Check if this arena is large enough */ + if ((maxaddr - minaddr) < size) + return NULL; + + /* Check bounds */ + if (arena->pa_start >= maxaddr) + return NULL; + if (arena->pa_end <= minaddr) + return NULL; + + r = kmem_zalloc(sizeof(struct pmem_region), KM_NOSLEEP); + if (r == NULL) + return NULL; + + ARENA_REGION_FOREACH(arena, tmp) { + if (!pmem_region_match(tmp, minaddr, maxaddr)) + continue; + + if (tmp->r_spec.r_prot != prot) + continue; + if (tmp->r_spec.r_props != props) + continue; + if (tmp->r_spec.r_type != type) + continue; + + addr = vmem_xalloc(tmp->r_vmem, size, align, phase, + nocross, minaddr, maxaddr, flags | VM_NOSLEEP); + if (addr == VMEM_ADDR_NULL) + continue; + + goto found; + } + + kmem_free(r, sizeof(struct pmem_region)); + return NULL; + +found: + r->r_spec.r_start = addr; + r->r_spec.r_end = addr + size; + r->r_spec.r_prot = tmp->r_spec.r_prot; + r->r_spec.r_props = tmp->r_spec.r_props; + r->r_spec.r_type = tmp->r_spec.r_type; + r->r_refcount = 1; + r->r_physregion = tmp; + + return r; +} + +int +pmem_free(struct pmem_region **r) +{ + struct pmem_region *r1; + KASSERT(r != NULL); + KASSERT(*r != NULL); + + r1 = *r; + + /* still referenced by others */ + if (r1->r_refcount > 1) + return EBUSY; + + vmem_xfree(r1->r_physregion->r_vmem, r1->r_spec.r_start, + r1->r_spec.r_end - r1->r_spec.r_start); + kmem_free(r1, sizeof(struct pmem_region)); + + *r = NULL; + return 0; +} + +/* Get/set properties on the region `r'. */ +int +pmem_get(struct pmem_region *r, pmem_prot_t *prot, pmem_props_t *props, + pmem_type_t *type) +{ + KASSERT(r != NULL); + KASSERT(prot != NULL); + KASSERT(props != NULL); + KASSERT(type != NULL); + + return 0; +} + +int +pmem_set(struct pmem_region *r, pmem_prot_t prot, pmem_props_t props, + pmem_type_t type) +{ + KASSERT(r != NULL); + + return 0; +} + +/* Count another reference to region `r'. */ +void +pmem_incref(struct pmem_region *r) +{ + KASSERT(r != NULL); + +} + +void +pmem_decref(struct pmem_region *r) +{ + KASSERT(r != NULL); + +} + +struct pmem_region * +pmem_map(struct pmem_arena *arena, struct pmem_region *r, paddr_t *paddr) +{ + KASSERT(arena != NULL); + KASSERT(r != NULL); + KASSERT(paddr != NULL); + + return NULL; +} + +void +pmem_unmap(struct pmem_region *r) +{ + KASSERT(r != NULL); + +} + +/* dumps */ + +static void +pmem_region_print(int i, struct pmem_phys_region *region) +{ + printf( " region %i: 0x%"PRIx64" - 0x%"PRIx64 + " protection: 0x%x, properties: 0x%x, type: 0x%x\n", + i, + region->r_spec.r_start, + region->r_spec.r_end, + region->r_spec.r_prot, + region->r_spec.r_props, + region->r_spec.r_type); +} + +void +pmem_regions_dump(struct pmem_arena *arena) +{ + int i = 0; + struct pmem_phys_region *region; + + if (arena) { + ARENA_REGION_FOREACH(arena, region) { + pmem_region_print(i, region); + i++; + } + return; + } + + REGION_FOREACH(region) { + pmem_region_print(i, region); + i++; + } +} + +static void +pmem_dimm_print(int i, struct pmem_dimm *dimm) +{ + printf( " dimm %i: 0x%"PRIx64" - 0x%"PRIx64 + " type: 0x%x, serial: 0x%x\n", + i, + dimm->d_spec.d_start, + dimm->d_spec.d_end, + dimm->d_spec.d_type, + dimm->d_spec.d_serial); +} + +void +pmem_dimms_dump(struct pmem_arena *arena) +{ + int i = 0; + struct pmem_dimm *dimm; + + if (arena) { + ARENA_DIMM_FOREACH(arena, dimm) { + pmem_dimm_print(i, dimm); + i++; + } + return; + } + + DIMM_FOREACH(dimm) { + pmem_dimm_print(i, dimm); + i++; + } +} + +void +pmem_arenas_dump(bool dump_regions, bool dump_dimms) +{ + int i = 0; + struct pmem_arena *arena; + + ARENA_FOREACH(arena) { + printf("arena %i: 0x%"PRIx64" - 0x%"PRIx64"\n", + i, + arena->pa_start, + arena->pa_end); + + if (dump_regions) { + pmem_regions_dump(arena); + } + + if (dump_dimms) { + pmem_dimms_dump(arena); + } + i++; + } +} Index: sys/sys/kcore.h =================================================================== RCS file: /cvsroot/src/sys/sys/kcore.h,v retrieving revision 1.2 diff -u -p -r1.2 kcore.h --- sys/sys/kcore.h 26 Dec 2005 18:41:36 -0000 1.2 +++ sys/sys/kcore.h 15 Dec 2008 23:36:28 -0000 @@ -47,6 +47,7 @@ typedef struct { u_quad_t start; /* Physical start address */ u_quad_t size; /* Size in bytes */ + uint32_t type; /* pmem(9) type of space */ } phys_ram_seg_t; typedef struct kcore_hdr { Index: sys/sys/pmem.h =================================================================== RCS file: sys/sys/pmem.h diff -N sys/sys/pmem.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/pmem.h 15 Dec 2008 23:36:28 -0000 @@ -0,0 +1,247 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_PMEM_H_ +#define _SYS_PMEM_H_ + +#include +#include + +enum pmem_type { + /* physical RAM types */ + PMEM_T_NORMAL = 0x0000, /* normal */ + PMEM_T_HOTSPARE = 0x0001, /* reserved to replace ram in + * critical or defect state + */ + PMEM_T_CRITICAL = 0x0002, /* usable, but may become unusable + * if too frequently accessed or + * doesn't run with lower power, + * for example. + */ + PMEM_T_DEFECT = 0x0004, /* really broken, unusable */ + PMEM_T_OFFLINE = 0x0008, /* unusable but does not imply + * to be defect + */ + PMEM_T_SERIALNR = 0x0010, /* serial number is available */ + + PMEM_T_MASK = 0xffff, /* physical type mask */ + + /* logical *use* RAM types */ + PMEM_U_UNKNOWN = 0x00000000, /* Unknown/reserved region found by + * bootstrapping code, bus scanning + * code/drivers need to figure out + * what for and may change this type. + */ + PMEM_U_TEXT = 0x00010000, /* Code */ + PMEM_U_DMABUF = 0x00020000, /* DMA buffer */ + PMEM_U_FIRMWARE = 0x00040000, /* Firmware data (e.g. ACPI) */ + PMEM_U_RAM = 0x00080000, /* normal usable RAM */ + PMEM_U_ROM = 0x00100000, /* any ROM */ + PMEM_U_MMIO = 0x00200000, /* any MMIO (e.g. PCI memory) */ + + PMEM_U_MIRROR = 0x00400000, /* mirrors an other range to provide + * a valid copy in case of memory + * errors during access. + * Allows to turn uncorrectable + * machine-check errors into an + * correctable error, for example. + */ + PMEM_U_PTP = 0x00800000, /* Pagetable Pages (e.g. MMU, IOMMU) */ + PMEM_U_MASK = 0xffff0000, /* logical use mask */ +}; + +enum pmem_prot { /* hardware implementation */ + PMEM_PROT_UNKNOWN = 0x00, + PMEM_PROT_READ = 0x01, /* PCI bus bridge, IOMMU */ + PMEM_PROT_WRITE = 0x02, /* PCI bus bridge, IOMMU, MTRR, + * AMD Elan SC520 PAR + */ + PMEM_PROT_EXEC = 0x04, /* AMD Elan SC520 PAR */ +}; + +enum pmem_props { /* hardware implementation */ + PMEM_P_UNKNOWN = 0x00, + PMEM_P_WTHRU = 0x01, /* MTRR */ + PMEM_P_WBACK = 0x02, /* MTRR */ + PMEM_P_WCOMB = 0x04, /* MTRR */ + PMEM_P_UNCACHED = 0x08, /* MTRR, AMD Elan SC520 PAR */ + PMEM_P_PREFETCH = 0x10, /* PCI bus bridge */ + PMEM_P_32BIT = 0x20, /* 32-bit access */ + PMEM_P_64BIT = 0x40, /* 64-bit access */ + PMEM_P_DMA = 0x80, /* DMA-safe memory */ +}; + + +typedef enum pmem_type pmem_type_t; +typedef enum pmem_prot pmem_prot_t; +typedef enum pmem_props pmem_props_t; + + +struct numa_info; +struct pmem_arena; +struct pmem_mapping; +struct pmem_phys_region; + +/* Describe a memory DIMM + * you have physically in your machine + * Some information may provide MD bootstrap code, + * most information may provide spdmem(4). + */ +struct pmem_dimm_spec { + paddr_t d_start; + paddr_t d_end; + pmem_type_t d_type; + uint32_t d_serial; /* DIMM serial number */ +}; + +struct pmem_region_spec { + paddr_t r_start; + paddr_t r_end; + pmem_prot_t r_prot; + pmem_props_t r_props; + pmem_type_t r_type; +}; + +struct pmem_region { + struct pmem_region_spec r_spec; + + u_int r_refcount; + struct pmem_phys_region *r_physregion; +}; + +/* One arena per NUMA-node */ +struct numa_info; +struct pmem_arena; + +struct pmem_mapping { + int dummy; /* TBD */ +}; + +typedef uint32_t pmem_metric_t; + + +/* Create arena. [start, end) describes the address range + * of the arena including all holes. + */ +struct pmem_arena * +pmem_arena_create(struct numa_info *ni, paddr_t start, paddr_t end); + +/* Set arena size incrementally. Only needed if you can't get the + * information in an ordered way. + */ +int +pmem_arena_loadrange(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type); + +/* Add new memory module to arena. */ +int +pmem_arena_add_dimm(struct pmem_arena *arena, + paddr_t start, paddr_t end, pmem_type_t type, uint32_t serial); + +/* Load physical addresses [start, end) having the given default properties. + */ +int +pmem_region_create(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props); + +/* Connect loaded physical addresses with this arena. */ +int +pmem_arena_add_regions(struct pmem_arena *arena); + +/* Load arena with physical addresses [start, end) having the given + * default properties. This basically does the same as + * pmem_region_create() and pmem_arena_add_regions() in one step, + * but can't be used in very early MD bootstrapping. + */ +int +pmem_arena_prime(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props); + +/* Connect two arenas. */ +int +pmem_arena_connect(struct pmem_arena *left, struct pmem_arena *right, + struct pmem_mapping *m, pmem_metric_t metric); + +/* Reserve a region in arena that meets the given criteria. + * The region is returned with a reference count of at least 1. + */ +struct pmem_region * +pmem_alloc(struct pmem_arena *arena, paddr_t minaddr, paddr_t maxaddr, + pmem_prot_t prot, pmem_props_t props, pmem_type_t type, + size_t align, size_t phase, size_t size, size_t nocross, + vm_flag_t flags, pmem_metric_t maxmetric); + +int +pmem_free(struct pmem_region **r); + +/* Get/set properties on the region `r'. */ +int +pmem_get(struct pmem_region *r, pmem_prot_t *prot, pmem_props_t *props, + pmem_type_t *type); + +int +pmem_set(struct pmem_region *r, pmem_prot_t prot, pmem_props_t props, + pmem_type_t type); + +/* Count another reference to region `r'. */ +void +pmem_incref(struct pmem_region *r); + +/* Reduce the reference count on `r' by one. pmem_decref may reclaim the + * resources held by `r'. + */ +void +pmem_decref(struct pmem_region *r); + +/* Map region `r' into arena `a'. + * + * Returns NULL on failure. `paddr' is undefined on failure. + * + * On success, return `r' if region `r' belongs to arena `a', or else + * return an alias for region `r' in `a'. The returned region's reference + * count is increased by one. Set `paddr' to the physical address of + * the start of the region `r' in arena `a'. + */ +struct pmem_region * +pmem_map(struct pmem_arena *arena, struct pmem_region *r, paddr_t *paddr); + +/* Remove a mapping of `r' from its arena. Decrease the reference count + * by one. + */ +void +pmem_unmap(struct pmem_region *r); + + +/* debug */ +void pmem_regions_dump(struct pmem_arena *arena); +void pmem_dimms_dump(struct pmem_arena *arena); +void pmem_arenas_dump(bool dump_regions, bool dump_dimms); + +#endif /* _SYS_PMEM_H_ */