// Copyright 1995 Barbara Liskov

// MM code that is specific to in-place initialization.

#include <stdio.h>
#include "common/arraysort.h"
#include "common/or_obj.h"
#include "common/fail.h"
#include "common/intarray.h"
#include "common/intset.h"

#include "or/or.h"
#include "or/or_config.h"
#include "or/thread.h"

#include "dformat.h"
#include "segtable.h"
#include "segment.h"
#include "mm.h"
#include "handle.h"
#include "itable.h"
#include "rtable.h"
#include "logrecord.h"

MM_Handle* MM::read_object(Oref oref, bool fast) {
    // Look in the cache
    int s = OREF_SEGMENT(oref);
    int i = OREF_INDEX(oref);

    // Fetch segment header
    Segment* segment = find_segment(s, TRUE);
    if (segment == 0) {
	// Segment is not currently in the cache.
	if (fast) return 0;

	// Make space for the segment in the cache and then retry.
	segment = find_segment(s, FALSE);
	return retry;
    }

    if (segment->missing()) {
	// Segment contents are being read from the disk
	if (fast) return 0;

	// Wait for the segment read to finish and then retry.
	OR_obj* obj = segment->pin(OREF_INDEX(oref));
	if (obj != 0) segment->unpin();
	return retry;
    }

    // Fetch object
    OR_obj* obj = segment->pin(i);
    if (obj == 0) return 0;

    // Found it
    MM_Handle* result	= new MM_Handle;
    result->in_itable	= FALSE;
    result->oref_	= oref;
    result->obj_	= obj;
    result->entry	= segment;
    return result;
}

Segment* MM::find_segment(int segnum, bool fast) {
    // We need a loop around the whole lookup code because
    // pieces of the lookup code end up releasing "mm->mutex"
    // while they wait and therefore we have to restart the
    // whole lookup process to avoid problems caused by
    // stale reads.

    while (1) {
	Segment* seg = (Segment*) cache->lookup(segnum);
	if (seg != 0) return seg;

	if (fast) return 0;

	// Find the disk range occupied by the segment
	Disk_Range range;
	if (!segtable->find(segnum, range)) return 0;

	// Allocate a cache entry
	seg = (Segment*) cache->lookup(segnum);
	if (seg == 0) {
	    cache->alloc(segnum,
			 range.address << DISK_UNIT_SHIFT,
			 range.count << DISK_UNIT_SHIFT);
	}
    }
}

CacheEntry* MM::alloc_entry(int id, long address, long size) {
    Disk_Range r;
    r.address = address >> DISK_UNIT_SHIFT;
    r.count   = size >> DISK_UNIT_SHIFT;
    return (new Segment(id, r));
}

int MM::free_space() {
    return segtable->free_space();
}

void MM::install_object(Oref oref, OR_obj* obj) {
    Segment* segment = find_segment(OREF_SEGMENT(oref));
    assert(segment != 0);
    if (!segment->install(OREF_INDEX(oref), obj))
	forward_object(oref, obj);
}

void MM::forward_object(Oref oref, OR_obj* obj) {
    th_fail("cannot handle growing objects that need to be moved");

    // XXX The following code needs to be fixed to create a log
    // record for the forwarder object.

    // Object will not fit, attempt to move it.
    Oref new_oref;
    if (!alloc_space(OR_obj_full_size(obj), new_oref))
	th_fail("could not install committed object even with forwarding");

    // Install object in new segment
    Segment* new_segment = find_segment(OREF_SEGMENT(new_oref));
    assert(new_segment != 0);
    if (!new_segment->install(OREF_INDEX(new_oref), obj))
	th_fail("could not install reserved new object");

    // Install forwarder in old segment.
    // (Need to find the segment again because it may have disappeared
    // from the cache while we were busy allocating space.)
    Segment* segment = find_segment(OREF_SEGMENT(oref));
    assert(segment != 0);
    segment->install_forwarder(OREF_INDEX(oref), new_oref);

    // XXX Somewhere, we should record the fact that this
    // transaction depends on "new_segment" and therefore
    // the log record(s) should not be deleted until
    // "new_segment" has been written out.
}

void MM::clean_log() {
    Log_Index low, high;
    IntArray segments;

    // We get the set of segments that should be written out, sort these
    // segments for good disk scheduling, and then write out the segments.
    mutex->grab(); {
	collect_segments(segments, low, high);
	sort_segments(segments);
	write_modifications(segments);
    } mutex->release();

    // Clear out the log records
    or->log->applied(low, high);
}

#undef SHOW_PURGING

void MM::collect_segments(IntArray& segments, Log_Index& low, Log_Index& high){
    Log* log = or->log;

    // When should we stop scanning the log for more segments?
    int cur_size = log->current_size();
    int target_size = log->target_size();

    low = log->low();
    high = low - 1;

#ifdef SHOW_PURGING
    // Measure log record absorption
    int scanned = 0;
    int purged = 0;
#endif

    IntSet set;
    while (cur_size > target_size) {
	// Can we get the segments from log[high+1]?
	int index = high+1;
	Log_Record* rec = log->fetch(index);
	if ((rec == 0) || !log->is_installed(index)) {
	    // Either this record has not been installed yet, or it
	    // has been removed from the log.
	    break;
	}

	high = index;
	rec->get_modified_segments(&set);
	cur_size -= rec->size();

#ifdef SHOW_PURGING
	scanned++;
	if (rec->absorbed()) purged++;
#endif
    }

#ifdef SHOW_PURGING
    fprintf(stderr, "cleaned: %3d records with %3d segments\n",
	    scanned, set.size());
    fprintf(stderr, "purged: %3d out of %3d records\n", purged, scanned);
    fprintf(stderr, "absorbed: %5d out of %5d mods\n", absorbed, mods);
    absorbed = 0;
    mods = 0;
#endif

    // Store collected segment numbers into the array
    segments.clear();

    int seg;
    IntSet::Elements e = &set;
    while (e.get(seg))
	segments.append(seg);
}

// Sorting routine for the disk scheduler.
// Assumes that the caller holds "mm->mutex".
static int sort_by_location(void const* p1, void const* p2) {
    int s1 = *((int const*) p1);
    int s2 = *((int const*) p2);

    Disk_Range r1, r2;
    if (! or->mm->segtable->find(s1, r1))
	th_fail("could not find disk range for segment");
    if (! or->mm->segtable->find(s2, r2))
	th_fail("could not find disk range for segment");

    return (r1.address - r2.address);
}

void MM::sort_segments(IntArray& segments) {
    // XXX Rotational scheduling will probably do better
    ArraySort(segments, sort_by_location);
}

void MM::write_modifications(IntArray const& segments) {
    // We just loop over the segments: read a segment, modify it,
    // and then write it out.

    // Array used to hold pending mods for a segment.
    Itable_Mods mods;

    int segcount = segments.size();
    for (int i = 0; i < segcount; i++) {
	int segnum = segments[i];

	// Extract pending modification array
	mods.clear();
	itable->get_modifications(segnum, &mods);

	// If segment is missing, count it as an iread
	Segment* s = find_segment(segnum, TRUE);

	// Install the pending modifications and clean up
	int modcount = mods.size();
	for (int i = 0; i < modcount; i++) {
	    Itable_Mod* m = mods[i];
	    Oref oref = m->oref();
	    OR_obj* obj = m->object();
	    install_object(oref, obj);
	    itable->remove(m);
	    m->unref();
	}

	// Now write out the segment
	Segment* seg = find_segment(segnum);
	seg->write();
	installs += modcount;
    }
}

bool MM::alloc_space(int slots, Oref& oref) {
    if (slots >= DISK_SEG_MAX_INDEX) {
	warn("object is too big for this OR: %d slots", slots);
	return FALSE;
    }

    int bytes = slots * sizeof(OR_slot);
    if (bytes >= big_threshold) {
	// Big object: allocate at least half a block more than
	// necessary to allow for various segment overheads.
	int seg;
	int blocks = (bytes + DISK_UNIT + DISK_UNIT/2) >> DISK_UNIT_SHIFT;
	if (! segtable->add(seg, blocks)) {
	    warn("could not allocate space for a big object segment");
	    return FALSE;
	}

	if (! rtable->reserve_new(seg, slots, oref)) {
	    warn("could not reserve space for big object");
	    return FALSE;
	}

	return TRUE;
    }

    // Try to make a reservation in current target
    if (new_arena >= 0) {
	if (rtable->reserve_new(new_arena, slots, oref))
	    return TRUE;
    }

    // Try to make a new target segment
    if (!segtable->add(new_arena, super->segprefsize)) {
	warn("could not allocate a new segment");
	return FALSE;
    }

    // Try to make a reservation in this segment
    if (rtable->reserve_new(new_arena, slots, oref))
	return TRUE;

    warn("failed to allocate object in new arena");
    return FALSE;
}

void MM::resize_dspace(int p) {
    fprintf(stderr, "cannot change disk utilization for in-place policy\n");
    return;
}

// Initialization and recovery code

static Disk_Segment* make_seg(int id, Disk_Range, int num);
// requires	Segment occupying range can store object with "num" total slots
// effects	Create a disk segment with a single object big enough to
//		have "num" total slots ("num" includes header)

void MM::create_seg_table() {
    // Find maximum number of segments we may need to map
    // XXX We assume that all segments are at least "segprefsize/2" big
    int max_segs = 2 * (super->size / super->segprefsize);

    // Find number of entries per leaf segment.  Leave a kilobyte or
    // so free in each leaf segment to account for various overheads.
    int segsize = super->segprefsize << DISK_UNIT_SHIFT;

    int segs_per_leaf = (segsize - 1024) / sizeof(OR_slot);
    int num_leaves = (max_segs + segs_per_leaf - 1) / segs_per_leaf;
    int blks_per_leaf = super->segprefsize;
    int lslots = (sizeof(Disk_LeafObj) / sizeof(OR_slot)) + segs_per_leaf - 1;

    // Assign disk range for root segment.
    // (Allow kilobyte for overhead again.)
    int root_seg_size = (sizeof(OR_slot)*num_leaves + 1024) >> DISK_UNIT_SHIFT;
    if (root_seg_size < super->segprefsize)
	root_seg_size = super->segprefsize;

    Disk_Range root_range;
    root_range.address  = DISK_SB_2+1;
    root_range.count    = root_seg_size;
    super->segtable	= root_range;

    // Now allocate root segment
    int rslots = ((sizeof(Disk_RootObj)/sizeof(OR_slot)) + segs_per_leaf - 1);
    Disk_Segment* r_seg = make_seg(0, root_range, rslots);
    Disk_RootObj* r = (Disk_RootObj*) (((OR_slot*) r_seg) + r_seg->header[0]);

    r->max_segments	= max_segs;
    r->num_leaves	= num_leaves;
    r->segs_per_leaf	= segs_per_leaf;

    // Now initialize the leaf entries in the root segment

    // Next available disk address
    Disk_Address avail = root_range.address + root_range.count;

    // Next segment number to be mapped (skip root and leaves)
    int segnum = num_leaves + 1;

    for (int i = 0; i < num_leaves; i++) {
	Disk_Range leaf_range;
	leaf_range.address = avail;
	leaf_range.count   = blks_per_leaf;

	r->ranges[i].value32[0] = leaf_range.address;
	r->ranges[i].value32[1] = leaf_range.count;

	Disk_Segment* l_seg = make_seg(i+1, leaf_range, lslots);
	Disk_LeafObj* l = (Disk_LeafObj*) (((OR_slot*)l_seg)+l_seg->header[0]);

	l->first = segnum;
	l->count = segs_per_leaf;
	for (int j = 0; j < segs_per_leaf; j++) {
	    // Mark unmapped entries with zero ranges
	    l->ranges[j].value32[0] = 0;
	    l->ranges[j].value32[1] = 0;
	}

	mutex->grab(); {
	    if (! disk->write(l_seg, leaf_range)) sysfail(or->config->disk);
	} mutex->release();

	delete [] l_seg;

	avail += blks_per_leaf;
	segnum += segs_per_leaf;
    }

    mutex->grab(); {
	if (! disk->write(r_seg, root_range)) sysfail(or->config->disk);
    } mutex->release();

    delete [] r_seg;

    // Also need to read in the segment table content
    recover_seg_table();
}

static Disk_Segment* make_seg(int id, Disk_Range range, int num) {
    int slots = (range.count << DISK_UNIT_SHIFT) / sizeof(OR_slot);

    // Object will require "num" slots + one entry in the offset table.
    assert(num + 1 <= slots);

    Disk_Segment* contents = (Disk_Segment*) new OR_slot[slots];

    contents->magic	= DISK_SEG_MAGIC;
    contents->id	= id;
    contents->type	= DISK_SEG_SPECIAL;
    contents->slots	= slots;
    contents->count	= 0;
    contents->objects	= slots;
    contents->size	= 0;

    // Allocate object
    contents->count++;
    contents->size += num;
    contents->objects -= num;
    contents->header[0] = contents->objects;

    // Initialize object header...
    OR_obj* obj = (OR_obj*) (((OR_slot*) contents) + contents->header[0]);
    OR_OBJ_CLASS(obj)		= 0;
    OR_OBJ_SIZE(obj)		= num - OR_obj_headers;
    OR_OBJ_BITFIELD(obj)	= OBJ_BF_ALLDATA;
    return contents;
}

void MM::recover_seg_table() {
    segtable = new Segment_Table;
    segtable->recover();
}
