// Copyright 1995 Barbara Liskov

/*
\section{Front-End Connection Management}

This file contains the code that manages interactions with a front-end.
We allocate a thread per front-end.  The thread loops around reading
messages from the front-end, and sending appropriates replies back
to the front-end.

This file handles all FE manager functions except the two-phase commit,
which is in commit.cc.

*/

/*
\subsection{Todo}

\begin{itemize}
\item Set-up non-blocking IO so that if the connection to the FE
      gets backed up, other portions of the OR do not get delayed
      waiting for fetched objects to be released.
\item Can TCP streams deadlock if both ends try to write a lot
      of data?  (I am worried about buffers at both end filling
      up and neither end reading any data because they are blocked
      waiting to write.)
\end{itemize}
*/

#include "utils/compat.h"

#include <assert.h>
#include <stdio.h>
#include <iostream.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>

#include "utils/fail.h"
#include "utils/Timer.h"
#include "utils/network.h"
#include "utils/samples.h"
#include "utils/th_assert.h"
#include "thread.h"

#include "common/orefs.h"
#include "common/oref_set.h"
#include "common/or_obj.h"

#include "mm/handle.h"
#include "mm/segment.h"
#include "mm/mm.h"
#include "mm/mm_stats.h"

#include "cachedir.h"
#include "fe_manager.h"
#include "fe_recv_message.h"
#include "or_send_message.h"
#include "fe_map.h"
#include "message_stats.h"
#include "pref_set.t"
#include "prefetch_info.h"
#include "or.h"
#include "or_config.h"
#include "tm.h"
#include "rwlock_mutex.h"

#include <sys/uio.h>


// Force template instantiation for Pref_set.
#if __DECCXX
#pragma define_template Buckets<IntKey, Bitmap* >
#pragma define_template BHash<IntKey, Bitmap* , Buckets<IntKey, Bitmap* > >
#pragma define_template BucketsImpl<IntKey, Bitmap*>
#pragma define_template BHashGenerator<IntKey, Bitmap*, Buckets<IntKey, Bitmap* > >
#pragma define_template BucketsGenerator<IntKey, Bitmap*>
#endif

// Globals for statistics

Timer fetch_timer;
float total_fetch_time = 0.0;


// The prefetch object set size is limited to be the following
// function of the prefetch size

// \subsection{FE Manager Implementation}

// XXX - We should check whether an old FE is reconnecting.
// If so, we should reuse the corresponding FE_table.


Address manager_to_address(FE_manager *fe) {
    // This method exists so that cachedir does not have to include fe_manager.h
    if (!fe) return Address::Error_address;
    return fe->net->address;
}
   
FE_manager::FE_manager(Network* n, Network* ir_net) {
    net		 = n;
    net_wlock    = new Mutex;

    // Network for iread avoidance requests. Does not need a lock because there
    // is a single thread performing iread avoidance.
    iread_net    = ir_net;  
                            

    mutex_commit = new Mutex;
    cond_commit  = new Condition(mutex_commit);
    tno = 0;

    invalid_objs = new Invalid_set;
    max_message = 0;
    current_message = 0;
    lock = new RW_lock_mutex();

    orx->fe_map->write_lock();
    orx->fe_map->store1(n->address, this);
    orx->fe_map->write_unlock();
    
    orx->fe_manager = this; // added so that similar segment can be
    // allocated at backup
    n->address.print();
    fprintf(stderr, "\n");
}

FE_manager::~FE_manager() {
    // If any handles have not been released, do it now.

    fprintf(stderr, "FE DIED ");
    net->address.print();
    fprintf(stderr, "\n\n");

    orx->fe_map->write_lock();
    orx->fe_map->remove_fast(net->address);
    orx->fe_map->write_unlock();

    delete net;
    delete net_wlock;
    delete mutex_commit;
    delete cond_commit;
    delete invalid_objs;
    delete lock;

    orx->cachedir->remove_fe(this);
    printf("removed fe from cachedir\n");
  
    // XXX should wakeup install thread if waiting for segment from
    // the FE that died.

    // orx->mm->stats->print();
    // msg_stats.print();
}

Address FE_manager::id() const {
    return net->address;
}

static Timer or_fetch_time;

void FE_manager::main() {

    // The various timers get activated only if the environment variable
    // THOR_PRINT_TIME has been set
    // static bool printres = getenv("THOR_PRINT_TIME")? TRUE: FALSE;

    // Disable pipe signals
    struct sigaction todo;
    todo.sa_handler = SIG_IGN;
    sigemptyset(&todo.sa_mask);
    todo.sa_flags = 0;
    sigaction(SIGPIPE, &todo, 0);

    FE_recv_get_root_msg root_handler;
    FE_recv_fetch_msg fetch_handler(this);
    FE_recv_commit_msg commit_handler(this);
    FE_recv_invalid_ack_msg invalid_ack_handler(this);
    FE_recv_alloc_msg alloc_handler(this);
    FE_recv_debug_msg debug_handler(this);
    FE_recv_trigger_msg trigger_handler;

    float const timeout = 1.0; // wait for at most so many seconds at a time
    int status;
    while ((status = net->wait_or_timeout(timeout)) >=0) {
	//   Send invalidation message if  necessary 
	send_invalidation_message_if_needed();
	if (status == 0) continue; // The network timed out.
	// Must be compatible with communication at the FE side.
	struct {
	    Ubits32 id;
	    Ubits32 type;
	} msg;
	if (! (net->recv_buffer(&msg, sizeof(msg)))) {
	    warn("Closing connection: msg id and type not received from fe");
	    net->shutdown();
	    break;
	}

	Recv_message *rm = NULL;
	switch (msg.type) {
	    case FE_get_root_type: rm = &root_handler; break;
	    case FE_fetch_type: rm = &fetch_handler; break;
	    case FE_commit_type: rm = &commit_handler; tno++; break;
	    case FE_invalid_ack_type: rm = &invalid_ack_handler; break;
	    case FE_alloc_type: rm = &alloc_handler; break;
	    case FE_debug_type: rm = &debug_handler; break;
	    case FE_trigger_type: rm = &trigger_handler; break;
	    default: 	    
		warn("FE sent invalid message type %d\n", msg.type);
		net->shutdown();
		break;
	}
	if (rm) rm->decode(net, msg.id);
    }
}

void FE_manager::send_invalidation_message_if_needed() {
    // Send any invalid object set information that has not
    // been sent to this frontend yet

    Invalid_set *invset = invalid_objs;
    if (current_message > max_message) {
	// There is invalidation information to be sent to the frontend
	Orefs *inv_orefs = invset->invalidated_since(max_message);
	int size = inv_orefs->size();
	th_assert(size > 0, "Size of invalid orefs is zero at OR");
	msg_stats.or_invalidation++; // XXX not used!
	OR_send_invalidation_msg inv_msg(size, inv_orefs->as_pointer(),
					 max_message+1, current_message);
	Send_net_message(&inv_msg, net, 1000*1000*1000); // XXX arbit. msg id
	delete inv_orefs;
	if (orx->config->debug_level)
	    fprintf(stderr, "Sent Inv message: S: %d E:%d Num: %d\n", 
		    max_message + 1,
		    current_message,
		    size);
	max_message = current_message;
    }
}


void Handle_stat_message(Network *net, Ubits32 msg_id) {
    OR_stat     stat;

    // Fill in the "stat" structure by asking various modules.
    orx->tm->stat(stat);
    orx->mm->stat(stat);

    // Now get the OR wall-clock time.
    struct timespec t;
    getclock(TIMEOFDAY, &t);
    stat.clock.seconds = t.tv_sec;
    stat.clock.micros  = t.tv_nsec / 1000;

    total_fetch_time += fetch_timer.elapsed();
    stat.total_fetch_time = float_to_time(total_fetch_time);
    // cout << "OR STATISTICS STARTED" << endl;
    OR_send_stat_reply_msg send_msg(&stat);
    Send_net_message(&send_msg, net, msg_id);
    
    fetch_timer.reset(); // Reset fetch timer for future stats
    // cout << "OR STATISTICS END" << endl;
}

#if 0
// This code is excluded from compilation because it has been
// superceded by message types. It is retained to salvage parts of it later.

// \subsection{Message handlers}

void FE_manager::old_send_invalidation_message_if_needed() {
    // Send any invalid object set information that has not
    // been sent to this frontend yet

    lock->write_lock();

    Invalid_set *invset = invalid_objs;
    if (current_message > max_message) {
	// There is invalidation information to be sent to the frontend
	Orefs *inv_orefs = invset->invalidated_since(max_message);
	int size = inv_orefs->size();
	th_assert(size > 0, "Size of invalid orefs is zero at OR");
	msg_stats.or_invalidation++;
	or_message omsg;
	omsg.msgtype = OR_INVALIDATION;
	omsg.u.invalid.count = size;
	omsg.u.invalid.msg_start = max_message + 1;
	omsg.u.invalid.msg_end = current_message;
	MM_HandleList *list = new MM_HandleList;
	if (orx->config->send_updates) {
	    // append values of invalidated objects that are cached
	    for (int i = 0; i < inv_orefs->size(); i++) {
		Oref o = (*inv_orefs)[i];
		MM_Handle *h = orx->mm->fast_fetch(o);
		if (h == 0) continue; // object not found in memory
		list->append(h);
		int segnum = OREF_SEGMENT(o);
		CacheDir *cd = orx->mm->cachedir;
		if (cd->lookup(segnum, fe_id) == CacheDir::reparable) 
		    cd->enter(segnum, fe_id, CacheDir::complete);
	    }
	    if (orx->config->debug_coop)
		cerr << "Sending " << list->size() << " updates." << endl;
	} 
	omsg.u.invalid.updates = list->size();
	net_wlock->grab(); {
	    omsg.encode(net);
	    int bytes = net->bytes_sent;
	    net->send_buffer((caddr_t) inv_orefs->as_pointer(), 
			     size*sizeof(Oref));
	    send_objects(net, list);
	    msg_stats.or_invalidation_bytes += net->bytes_sent-bytes;
	    net->flush();
	} net_wlock->release();

	orx->mm->release_list(list);
	delete list;
	delete inv_orefs;
	if (orx->config->debug_level)
	    fprintf(stderr, "Sent Inv message: S: %d E:%d Num orefs: %d\n", 
		    max_message + 1,
		    current_message,
		    size);
	max_message = current_message;

    }

    lock->write_unlock();
}

void FE_manager::handle_stat_message() {
    or_message  reply;
    or_stat     stat;

    // Fill in the "stat" structure by asking various modules.
    orx->tm->stat(stat);
    orx->mm->stat(stat);

    // Now get the OR wall-clock time.
    struct timespec t;
    getclock(TIMEOFDAY, &t);
    stat.clock.seconds = t.tv_sec;
    stat.clock.micros  = t.tv_nsec / 1000;

    reply.msgtype = OR_STAT;
    net_wlock->grab(); {
	reply.encode(net);
	stat.encode(net);
	net->flush();
    } net_wlock->release();

    // Reset commit timers
    cumm_total_timer.reset();
    cumm_recv_timer.reset();
    cumm_send_timer.reset();
    cumm_validate_timer.reset();

    // Also Print the data about FETCH and IREADS
    cout << "OR STATISTICS STARTED" << endl;
    fetch_stats->print();
    orx->mm->stats->print();
    msg_stats.print();
    cout << "OR STATISTICS END" << endl;

    // Reset statistics for the next run
    fetch_stats->reset();
    orx->mm->stats->reset();
    msg_stats.reset();
}


void FE_manager::fetch() {

    int segnum = OREF_SEGMENT(requested);
    CacheDir *cd = orx->mm->cachedir;
    CacheDir::State state = cd->lookup(segnum, id());

    // clean some shared state for this fetch attempt
    lock->write_lock(); {
	fetch_denied = FALSE;
	helper = id(); // so that does not match with others
    } lock->write_unlock();

    // If the FE has a reparable segment, just send the modifications.
    if (orx->config->fetch_mods && state == CacheDir::reparable) {
	fetch_stats->mods++;
	send_mods(segnum);
	cd->enter(segnum, id(), CacheDir::complete);
	return;
    }

    // If the server has segment in cache, apply modifications and send it.
    if (orx->config->fetch_cache && send_segment(segnum, TRUE)) {
	fetch_stats->cache++;
	cd->enter(segnum, id(), CacheDir::complete);
	return;
    }
    
    // If another client has the complete segment, forward the request.
    fe_num helper_;
    if (orx->config->fetch_complete && 
	cd->search(segnum, CacheDir::complete, tried_fes, helper_)) {
	fetch_stats->forward++;
	lock->write_lock(); { 
	    helper = helper_; // others should not read helper during update 
	} lock->write_unlock();
	forward_fetch(segnum, helper, id(), request_id, FALSE);
	cd->enter(segnum, id(), CacheDir::complete);
	return;
    }

    // If another client has a reparable segment, forward the request and mods.
    if (orx->config->fetch_reparable && 
	cd->search(segnum, CacheDir::reparable, tried_fes, helper_)) {
	fetch_stats->forward_mods++;
	lock->write_lock(); {
	    helper = helper_;
	} lock->write_unlock();
	forward_fetch(segnum, helper, id(), request_id, TRUE);
	cd->enter(segnum, id(), CacheDir::complete);
	cd->enter(segnum, helper, CacheDir::complete); // helper is complete
	return;
    }
	
    // For statistics, check if there is any FE with unreparable segment
    // (such segments cannot be used to serve fetches) 
    if (cd->search(segnum, CacheDir::unreparable, tried_fes, helper_)) {
	fetch_stats->unreparable++;
    }

    // If the server has segment in disk, apply modifications and send it.
    if (orx->config->fetch_disk && send_segment(segnum, FALSE)) {
	fetch_stats->disk++;
	cd->enter(segnum, id(), CacheDir::complete);
	return;
    }

    // Segment not found.
    send_fetchfailed(requested);
}


void FE_manager::forward_fetch_denied (Address requester_num, int req_id) {
    // debugging msg
    if (orx->config->debug_coop)
	cerr << "FE_FETCH_DENIED by " << fe_id
	     << " for " << requester_num 
	     << " : req_id=" << req_id
	     << endl;
    // update statistics
    fetch_stats->forward_denied++;

    FE_manager *requester;
    orx->fe_map->read_lock(); {
	requester = fe_map->lookup(requester_num);
	    orx->fe_map->read_unlock();
	    return; // requeter disappeared
	}
    } orx->fe_map->read_unlock();

    requester->lock->write_lock(); {	
	if (requester->helper == id() && 
	    requester->request_id == req_id) {
	    // applies to the current fetch at the requester
	    requester->fetch_denied = TRUE;
            requester->tried_fes->insert(id());
	    orx->mm->cachedir->enter(OREF_SEGMENT(requester->requested), 
				     id(), CacheDir::absent);
        }
    } requester->lock->write_unlock();
}

void FE_manager::send_mods(int segnum) {
    // collect modified objects in list
    
    msg_stats.or_fetch_ok++;
    MM_HandleList *list = new MM_HandleList;
    orx->mm->mods_prefetch(segnum, list);
    or_message omsg;
    omsg.msgtype = OR_FETCH_OK;
    omsg.u.fetch.or = orx->config->ornum;
    omsg.u.fetch.fullSegment = FALSE;
    omsg.u.fetch.u.number = list->size();
    omsg.u.fetch.request_id = request_id;
    net_wlock->grab(); {
	omsg.encode(net);
	int bytes_sent = net->bytes_sent;
	send_objects(net, list);
	msg_stats.or_fetch_ok_bytes += net->bytes_sent - bytes_sent;
	net->flush();
    } net_wlock->release();
    orx->mm->release_list(list);
    delete list;
}
    

bool FE_manager::send_segment(int segnum, bool fast) {
    msg_stats.or_fetch_ok++;
    stat_timer.reset();
    stat_timer.start();
    Segment *seg = orx->mm->fetch_segment(segnum, fast);
    stat_timer.stop();
    fetch_stats->disk_time += stat_timer.elapsed();
    if (seg == 0) return FALSE;

    or_message omsg;
    omsg.msgtype = OR_FETCH_OK;
    omsg.u.fetch.or = orx->config->ornum;
    omsg.u.fetch.fullSegment = TRUE;
    omsg.u.fetch.u.segnum = seg->id();
    omsg.u.fetch.request_id = request_id;
    net_wlock->grab(); {
	omsg.encode(net);
	int bytes_sent = net->bytes_sent;
	seg->send(net);
	msg_stats.or_fetch_ok_bytes += net->bytes_sent - bytes_sent;
	net->flush();
    } net_wlock->release();

    orx->mm->mutex->grab(); {
	seg->unpin(); // for pin added in fetch_segment
    } orx->mm->mutex->release();
    return TRUE;
}


void FE_manager::send_fetchfailed(Oref o) {
    fprintf(stderr, "fetch failed: %d:%d\n", OREF_SEGMENT(o), OREF_INDEX(o));
    or_message omsg;
    omsg.msgtype = OR_FETCH_DENY;
    omsg.u.denied = o;
    net_wlock->grab(); {
	omsg.encode(net);
	net->flush();
    } net_wlock->release();

    return;
}    

void FE_manager::drop_segments(int num) {
    int segnums[MAX_DROPPED_SEGS];
    net->recv_buffer(segnums, num*sizeof(int));
    for (int i = 0; i < num; i++)
	orx->mm->cachedir->enter(segnums[i], id(), CacheDir::absent);
}

void FE_manager::handle_coop_fetch_denied() {
	if (fetch_denied) {
           // Helper doesn\'t have the segment cached.
           orx->mm->cachedir->enter(OREF_SEGMENT(requested), id(), 
                                   CacheDir::absent);
	   fetch(); // retry the fetch
        }
}

bool forward_fetch (int segnum, Address helper, Address requester, 
		    int request_id, bool append_mods) {

    // Debugging message
    if (orx->config->debug_coop) {
	cerr << "Forward seg " << segnum << " from " << helper;
	if (requester.is_zero()) cerr << " to OR";
	else cerr << " to " << requester;
	cerr << " : req_id=" << request_id;
	if (append_mods) cerr << " : mods sent";
	cerr << endl;
    }

    FE_manager *helper_fe = NULL;
    orx->fe_map->read_lock(); {
	helper_fe = fe_map->lookup(helper);
	if (!helper_fe) {
	    cerr << "helper disappeared" << endl;
	    orx->fe_map->read_unlock();
            return FALSE;
	}
    } orx->fe_map->read_unlock();
    
    or_message omsg;
    omsg.msgtype =  OR_FORWARDED_FETCH;
    omsg.u.forwarded_fetch.requester = requester.structify();
    omsg.u.forwarded_fetch.request_id = request_id;
    omsg.u.forwarded_fetch.or_num = orx->config->ornum;
    omsg.u.forwarded_fetch.seg_num = segnum;
    
    MM_HandleList *list = new MM_HandleList;
    if (append_mods) orx->mm->mods_prefetch(segnum, list);

    omsg.u.forwarded_fetch.num_mods = list->size();

    // If the requester is null this is an iread avoidance request.
    // send request in special iread channel.
    Network *send_net = 
      (requester.is_zero()) ? helper_fe->iread_net : helper_fe->net;
      
    msg_stats.or_forwarded_fetch++;
    // send msg on the helpers\'s connection
    helper_fe->net_wlock->grab(); {
      omsg.encode(send_net);
      int bytes = send_net->bytes_sent;
      send_objects(send_net, list);
      msg_stats.or_forwarded_fetch_bytes += send_net->bytes_sent - bytes;
      send_net->flush();
    } helper_fe->net_wlock->release();
   
    orx->mm->release_list(list);
    delete list;
    return TRUE;
}

void send_objects(Network *net, MM_HandleList *list) {
    int number = list->size();
    if (number==0) return;

    /* first send the objdesc\'s */
    iovec *iov = new iovec[number + 1];
    or_objdesc *desc = new or_objdesc[number];
    iov[0].iov_base = (caddr_t) desc;
    iov[0].iov_len  = number * sizeof(or_objdesc);

    /* Handle objects */
    int i;
    for (i = 0; i < number; i++) {
        
	MM_Handle* h = list->slot(i);
	OR_obj *obj = h->obj();
        Oref oref = h->oref();

	/* Find size */
	int size = OR_obj_full_size(obj);

	/* Set-up descriptor */
	desc[i].o = oref;
	desc[i].objsize = size;

        iov[i+1].iov_base = (caddr_t) obj;
        iov[i+1].iov_len = size*sizeof(OR_slot);
    }

    /* Now send the collected data */
    or_fetch_time.stop();
    if (!net->send_vector(iov, number+1))
      th_fail("Error sending objects to FE\n");
    delete [] desc;
    delete [] iov;
}

Fetch_Stats* FE_manager::fetch_stats = new Fetch_Stats();

Fetch_Stats::Fetch_Stats() {
    reset();
}

void Fetch_Stats::reset() {
    total=0;
    mods=0;
    cache=0;
    forward=0;
    forward_mods=0;
    forward_denied=0;
    disk=0;
    unreparable=0;
    disk_time = 0.0;
    tot_fetch_time = 0.0;
}

void Fetch_Stats::print() {
    cout << "FETCH STATISTICS (cummulative) " << endl;
    cout << "total= " << total << endl;          
    cout << "served from mob (segment not sent) = " << mods << endl;
    cout << "served from segment cache = " << cache << endl;
    cout << "requests forwarded to FEs with complete segments (mods not sent) = "
	 << forward << endl;        
    cout << "requests forwarded to FEs with reparable segments (mods sent) = " 
	 << forward_mods << endl;        
    cout << "requests denied = " << forward_denied << endl;
    cout << "fetches not forwarded because the only cached segments were unreparable = "
	 << unreparable << endl;
    cout << "served from disk = " << disk << endl;
    cout << "Time spent waiting for disk = " 
	 << disk_time << endl;
    cout << "Total time spent handling fetches = " 
	 << tot_fetch_time << endl;
    cout << "Invariant: total=mods+cache+forwarded+forwarded_mods-denied+disk"
	 << endl;
}

#endif /* end of if 0 */
