// Copyright 1995 Barbara Liskov

// \section{Segment Table Implementation}

#include <iostream.h>

#include "utils/basic.h"
#include "utils/device.h"
#include "utils/intset.h"
#include "utils/th_assert.h"
#include "utils/Timer.h"

#include "common/or_obj.h"

#include "or/or.h"
#include "or/thread.h"

#include "scache.h"
#include "disk.h"
#include "dformat.h"
#include "handle.h"
#include "log.h"
#include "logrecord.h"
#include "mm.h"
#include "segment.h"
#include "segtable.h"
#include "config/vdefs/REPLICATION.h"

Segment_Table::Segment_Table() {
    // Create an uninitialized state
    root	= 0;
    leaf	= 0;
    last	= -1;
    blk_max	= 0;
}

void Segment_Table::recover() {
    // Pin the root
    Oref oref = Oref_screate(0,0);
    MM_Handle* h = orx->mm->fetch(oref);
    th_assert(h != 0, "could not read segment table root");
    root = (Disk_RootObj*) h->obj();
    // Never unpin the object

    // Pin all leaves
    leaf = new Disk_LeafObj*[root->num_leaves];
    for (Uint i = 0; i < root->num_leaves; i++) {
	oref = Oref_screate(i + 1, 0);
	h = orx->mm->fetch(oref);
	th_assert(h != 0, "could not read segment table leaf");
	leaf[i] = (Disk_LeafObj*) h->obj();
	// Never unpin the object
    }

    recompute_allocation_info();
}

void Segment_Table::recompute_allocation_info() {
    int num_leaves = root->num_leaves;
    int segs_per_leaf = root->segs_per_leaf;

    // Root is segment 0, leaves are 1..num_leaves.
    last = num_leaves;
    Disk_Range last_leaf = root->ranges[last-1];
    blk_max = last_leaf.address + last_leaf.count - 1;

    // Now scan all leaves
    for (int i = 0; i < num_leaves; i++) {
	assert(leaf[i]->first == (Uint)(1 + num_leaves + (i * segs_per_leaf)));
	for (int j = 0; j < segs_per_leaf; j++) {
	    Disk_Range range = leaf[i]->ranges[j];

	    if (range.count == 0) {
		// Not allocated this segment
		continue;
	    }

	    int segnum = leaf[i]->first + j;
	    if (last < segnum) last = segnum;

	    Disk_Address end = range.address + range.count - 1;
	    if (blk_max < end) blk_max = end;
	}
    }
}

bool Segment_Table::find(int segid, Disk_Range& range) const {
    if (segid == 0) {
	// Root
	range = orx->mm->super->segtable;
	return TRUE;
    }

    if ((Uint) segid <= root->num_leaves) {
	// Leaf
	range.address = root->ranges[segid-1].address;
	range.count   = root->ranges[segid-1].count;
	return TRUE;
    }

    // Data segment
    Uint leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    Uint offset   = (segid - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == (Uint)segid);

    if (leaf[leaf_num]->ranges[offset].count == 0)
	return FALSE;

    range.address  = leaf[leaf_num]->ranges[offset].address;
    range.count    = leaf[leaf_num]->ranges[offset].count;
    return TRUE;
}


bool Segment_Table::add(int& newid, int blocks) {
    int segid;
    Disk_Range range;
    Log_Record* rec;
    Log_Index index;

    if (alloc(segid, range, blocks)) {
	// Initialize the segment
	Segment* seg = new Segment(segid, range);
	seg->init();
	orx->mm->cache->enter(segid, seg);

	// XXX _There is no need to flush the log record because
	// it will get flushed with transaction records anyway.
	rec = new Seg_Alloc_Record(segid, range);
	// Release mm->mutex before invoking log operations
	// since these may block on logspace, causing deadlock.
	orx->mm->mutex->release();
	index = orx->log->append(rec, FALSE);
	orx->log->installed(index);
	orx->mm->mutex->grab();

	newid = segid;
	return TRUE;
    }

    return FALSE;
}

int Segment_Table::free_space() {
    int free_blocks = orx->mm->super->size - blk_max - 1;
    return (free_blocks << DISK_UNIT_SHIFT);
}

// XXX This is only going to get called at the backup or on
//     recovery.
void Segment_Table::install_mapping(int segid, Disk_Range range) {
    // Check that segment table after allocation will still fit in
    // its assigned disk region
    th_assert((Uint)segid > root->num_leaves,
	      "modifying special segment location");
    th_assert((Uint) segid < root->max_segments,
	      "modifying out of range segment");

    Uint leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    Uint offset   = (segid - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == (Uint)segid);

    leaf[leaf_num]->ranges[offset].address = range.address;
    leaf[leaf_num]->ranges[offset].count = range.count;
}

int Segment_Table::container(int segid) {
    th_assert(segid != 0, "no container for root segment");
    if ((Uint)segid <= root->num_leaves)
	// Leaf segment location is kept in root segment
	return 0;

    Uint leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);

    return leaf_num+1;
}

    
bool Segment_Table::alloc(int& s, Disk_Range& range, int blocks) {
    // Allocate segment id
    // Start searching after last allocated id
    int id = -1;
    Uint i;
    for (i = last+1; (Uint)i < root->max_segments; i++) {
	Disk_Range junk;
	if (! find(i, junk)) { id = i; break; }
    }
    if (id < 0) {
	// Perform a full search
	for (i = root->num_leaves+1; i < root->max_segments; i++) {
	    Disk_Range junk;
	    if (! find(i, junk)) { id = i; break; }
	}
    }
    if (id < 0) {
      return FALSE;
    }

    // XXX _Since we are not currently freeing segment storage,
    // we allocate space at the end of the current space._

    Disk_Address start = blk_max + 1;
    if (start + blocks > orx->mm->super->size)
	// Not enough space for new segment
	return FALSE;

    // Allocate space
    Uint leaf_num = (id - root->num_leaves - 1) / root->segs_per_leaf;
    Uint offset   = (id - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == (Uint)id);

    range.address = start;
    range.count   = blocks;
    leaf[leaf_num]->ranges[offset].address = range.address;
    leaf[leaf_num]->ranges[offset].count = range.count;

    // Get leaf segment
    Segment* leaf_seg = (Segment*) orx->mm->cache->lookup(leaf_num+1);
    th_assert(leaf_seg != 0, "segment table leaf is not cached");
    leaf_seg->mark_dirty();

    // Updated cached info to speed up allocation
    blk_max = range.address + range.count - 1;
    last    = id;
    s = id;
    return TRUE;
}

void Segment_Table::print(FILE *fp) {
    if (!fp) fp = stderr;
    fprintf(fp, "Segment Table:\n");
    fprintf(fp, "Last id alloc = %d, Max block alloc = %d\n", last, blk_max);
    fprintf(fp, "Root: Max DB segs = %d, Num leaves = %d, Segs per leaf = %d\n",
	    root->max_segments, root->num_leaves, root->segs_per_leaf);
    for (Uint i = 0; i < root->num_leaves; i++) {
	fprintf(fp, "Leaf: %d, Address  --- ", i);
	root->ranges[i].print(fp);
	assert(leaf[i]->first == (Uint)(1 + root->num_leaves
					+ (i * root->segs_per_leaf)));
	fprintf(fp, ", First mappped seg = %d, Count = %d\n",
		leaf[i]->first, leaf[i]->count);
	for (Uint j = 0; j < root->segs_per_leaf; j++) {
	    Disk_Range range = leaf[i]->ranges[j];
	    if (range.count) {
		fprintf(fp, "[Seg %d", j);
		range.print(fp);
		fprintf(fp, "] ");
	    }
	}
	fprintf(fp, "\n");
    }
}

// \subsection{Log Record Operations}
Seg_Alloc_Record::Seg_Alloc_Record(int seg, Disk_Range r)
    : Log_Record(Tid())
{
    segment	= seg;
    range	= r;
    stamp	= orx->mm->new_stamp();
}

Seg_Alloc_Record::Seg_Alloc_Record()
    : Log_Record(Tid()) {};


Seg_Alloc_Record::~Seg_Alloc_Record() {
}

int Seg_Alloc_Record::size() {
    return (sizeof(Log_Record) + sizeof(segment) + sizeof(range));
}

#if REPLICATION
int Seg_Alloc_Record::type() {
    return ( Log_Record::type() + Seg_Alloc_Record_Type );
}
#endif

bool Seg_Alloc_Record::encode(Device* dev) {
    // Do not encode the time stamp, it will be re-generated at recover time.
    return (tid().encode(dev) &&
	    dev->send_ubits32(segment, TRUE) &&
	    dev->send_ubits32(range.address, TRUE) &&
	    dev->send_ubits32(range.count, TRUE)
	    );
}

bool Seg_Alloc_Record::decode(Device* dev) {
    struct {Ubits32 seg, addr, count;} msg;

    if (! (tid().decode(dev) &&
	   dev->recv_buffer(&msg, sizeof(msg))
	   ))
	return FALSE;
    
    segment = msg.seg;
    range.address = msg.addr;
    range.count = msg.count;
    return TRUE;
}

void Seg_Alloc_Record::install() {
    orx->mm->mutex->grab();
    orx->mm->segtable->install_mapping(segment, range);
    stamp = orx->mm->new_stamp();
    orx->mm->mutex->release();
}

void Seg_Alloc_Record::get_modified_segments(IntSet* set) {
    // The allocated segment has to be written out
    set->insert(segment);

    // A segment table page may also have to be written out
    int container_num = orx->mm->segtable->container(segment);

    // Locate the segment in the cache
    Segment* container = (Segment*) orx->mm->cache->lookup(container_num);
    th_assert(container != 0, "segment table contents are not cached");

    if (container->modified(stamp)) {
	// Container has pending modifications that happened on or before
	// the installation of this record into the container.  Therefore
	// we need to write out the container.
	set->insert(orx->mm->segtable->container(segment));
    }
}
