// Copyright 1995 Barbara Liskov

// \section{Segment Table Implementation}

#include "common/basic.h"
#include "common/device.h"
#include "common/or_obj.h"
#include "common/intset.h"
#include "common/th_assert.h"

#include "or/or.h"
#include "or/thread.h"

#include "scache.h"
#include "disk.h"
#include "dformat.h"
#include "handle.h"
#include "log.h"
#include "logrecord.h"
#include "mm.h"
#include "segment.h"
#include "segtable.h"

Segment_Table::Segment_Table() {
    // Create an uninitialized state
    root	= 0;
    leaf	= 0;
    last	= -1;
    blk_max	= 0;
}

void Segment_Table::recover() {
    // Pin the root
    Oref oref;
    OREF_SET(oref,0,0);
    MM_Handle* h = or->mm->fetch(oref);
    th_assert(h != 0, "could not read segment table root");
    root = (Disk_RootObj*) h->obj();
    // Never unpin the object

    // Pin all leaves
    leaf = new Disk_LeafObj*[root->num_leaves];
    for (int i = 0; i < root->num_leaves; i++) {
	OREF_SET(oref,i+1,0);
	h = or->mm->fetch(oref);
	th_assert(h != 0, "could not read segment table leaf");
	leaf[i] = (Disk_LeafObj*) h->obj();
	// Never unpin the object
    }

    recompute_allocation_info();
}

void Segment_Table::recompute_allocation_info() {
    int num_leaves = root->num_leaves;
    int segs_per_leaf = root->segs_per_leaf;

    // Root is segment 0, leaves are 1..num_leaves.
    last = num_leaves;
    OR_slot last_leaf = root->ranges[last-1];
    blk_max = last_leaf.value32[0] + last_leaf.value32[1] - 1;

    // Now scan all leaves
    for (int i = 0; i < num_leaves; i++) {
	assert(leaf[i]->first == 1 + num_leaves + (i * segs_per_leaf));
	for (int j = 0; j < segs_per_leaf; j++) {
	    OR_slot range = leaf[i]->ranges[j];

	    if (range.value32[1] == 0) {
		// Not allocated this segment
		continue;
	    }

	    int segnum = leaf[i]->first + j;
	    if (last < segnum) last = segnum;

	    Disk_Address end = range.value32[0] + range.value32[1] - 1;
	    if (blk_max < end) blk_max = end;
	}
    }
}

bool Segment_Table::find(int segid, Disk_Range& range) const {
    if (segid == 0) {
	// Root
	range = or->mm->super->segtable;
	return TRUE;
    }

    if (segid <= root->num_leaves) {
	// Leaf
	range.address = root->ranges[segid-1].value32[0];
	range.count   = root->ranges[segid-1].value32[1];
	return TRUE;
    }

    // Data segment
    int leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    int offset   = (segid - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == segid);

    if (leaf[leaf_num]->ranges[offset].value32[1] == 0)
	return FALSE;

    range.address  = leaf[leaf_num]->ranges[offset].value32[0];
    range.count    = leaf[leaf_num]->ranges[offset].value32[1];
    return TRUE;
}

// \subsection{Log Record for Segment Allocation}
class Seg_Alloc_Record : public Log_Record {
  public:
    Seg_Alloc_Record(int seg, Disk_Range range);
    // requires	"mm->mutex" is held by the caller

    virtual ~Seg_Alloc_Record();

    virtual int  size();
    virtual bool encode(Device*);
    virtual bool decode(Device*);

    virtual void install();
    virtual void get_modified_segments(IntSet*);
  private:
    int		segment;	// Segment number
    Disk_Range	range;		// Disk range assigned to segment
    long	stamp;		// Time stamp of modification to seg table
};

bool Segment_Table::add(int& newid, int blocks) {
    int segid;
    Disk_Range range;
    Log_Record* rec;
    Log_Index index;

    if (alloc(segid, range, blocks)) {
	// XXX _There is no need to flush the log record because
	// it will get flushed with transaction records anyway.
	rec = new Seg_Alloc_Record(segid, range);
	index = or->log->append(rec, FALSE);
	or->log->installed(index);

	// Initialize the segment
	Segment* seg = new Segment(segid, range);
	seg->init();
	or->mm->cache->enter(segid, seg);

	newid = segid;
	return TRUE;
    }

    return FALSE;
}

int Segment_Table::free_space() {
    int free_blocks = or->mm->super->size - blk_max - 1;
    return (free_blocks << DISK_UNIT_SHIFT);
}

// XXX This is only going to get called at the backup or on
//     recovery.
void Segment_Table::install_mapping(int segid, Disk_Range range) {
    // Check that segment table after allocation will still fit in
    // its assigned disk region
    th_assert(segid > root->num_leaves, "modifying special segment location");
    th_assert(segid < root->max_segments, "modifying out of range segment");

    int leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    int offset   = (segid - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == segid);

    leaf[leaf_num]->ranges[offset].value32[0] = range.address;
    leaf[leaf_num]->ranges[offset].value32[1] = range.count;
}

int Segment_Table::container(int segid) {
    th_assert(segid != 0, "no container for root segment");
    if (segid <= root->num_leaves)
	// Leaf segment location is kept in root segment
	return 0;

    int leaf_num = (segid - root->num_leaves - 1) / root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);

    return leaf_num+1;
}
    
bool Segment_Table::alloc(int& s, Disk_Range& range, int blocks) {
    // Allocate segment id
    // Start searching after last allocated id
    int id = -1;
    for (int i = last+1; i < root->max_segments; i++) {
	Disk_Range junk;
	if (! find(i, junk)) { id = i; break; }
    }
    if (id < 0) {
	// Perform a full search
	for (i = root->num_leaves+1; i < root->max_segments; i++) {
	    Disk_Range junk;
	    if (! find(i, junk)) { id = i; break; }
	}
    }
    if (id < 0) return FALSE;

    // XXX _Since we are not currently freeing segment storage,
    // we allocate space at the end of the current space._

    Disk_Address start = blk_max + 1;
    if (start + blocks > or->mm->super->size)
	// Not enough space for new segment
	return FALSE;

    // Allocate space
    int leaf_num = (id - root->num_leaves - 1) / root->segs_per_leaf;
    int offset   = (id - root->num_leaves - 1) % root->segs_per_leaf;
    assert(leaf_num < root->num_leaves);
    assert(offset + leaf[leaf_num]->first == id);

    range.address = start;
    range.count   = blocks;
    leaf[leaf_num]->ranges[offset].value32[0] = range.address;
    leaf[leaf_num]->ranges[offset].value32[1] = range.count;

    // Get leaf segment
    Segment* leaf_seg = (Segment*) or->mm->cache->lookup(leaf_num+1);
    th_assert(leaf_seg != 0, "segment table leaf is not cached");
    leaf_seg->mark_dirty();

    // Updated cached info to speed up allocation
    blk_max = range.address + range.count - 1;
    last    = id;

    s = id;
    return TRUE;
}

// \subsection{Log Record Operations}
Seg_Alloc_Record::Seg_Alloc_Record(int seg, Disk_Range r)
    : Log_Record(Tid())
{
    segment	= seg;
    range	= r;
    stamp	= or->mm->new_stamp();
}

Seg_Alloc_Record::~Seg_Alloc_Record() {
}

int Seg_Alloc_Record::size() {
    return (sizeof(Log_Record) + sizeof(segment) + sizeof(range));
}

bool Seg_Alloc_Record::encode(Device* dev) {
    // Do not encode the time stamp, it will be re-generated at recover time.
    return (tid().encode(dev) &&
	    dev->send_ubits32(segment) &&
	    dev->send_ubits32(range.address) &&
	    dev->send_ubits32(range.count)
	    );
}

bool Seg_Alloc_Record::decode(Device* dev) {
    ubits32 seg, addr, count;

    if (! (tid().decode(dev) &&
	   dev->recv_ubits32(&seg) &&
	   dev->recv_ubits32(&addr) &&
	   dev->recv_ubits32(&count)
	   ))
	return FALSE;

    segment = seg;
    range.address = addr;
    range.count = count;
}

void Seg_Alloc_Record::install() {
    or->mm->mutex->grab();
    or->mm->segtable->install_mapping(segment, range);
    stamp = or->mm->new_stamp();
    or->mm->mutex->release();
}

void Seg_Alloc_Record::get_modified_segments(IntSet* set) {
    // The allocated segment has to be written out
    set->insert(segment);

    // A segment table page may also have to be written out
    int container_num = or->mm->segtable->container(segment);

    // Locate the segment in the cache
    Segment* container = (Segment*) or->mm->cache->lookup(container_num);
    th_assert(container != 0, "segment table contents are not cached");

    if (container->modified(stamp)) {
	// Container has pending modifications that happened on or before
	// the installation of this record into the container.  Therefore
	// we need to write out the container.
	set->insert(or->mm->segtable->container(segment));
    }
}
