/*=============================================================================
ttyrpld - TTY replay daemon
user/replay.c - Realtime TTY log analyzer
  Copyright (C) Jan Engelhardt <jengelh [at] linux01 gwdg de>, 2004
  -- License restrictions apply (GPL2)

  This file is part of ttyrpld.
  ttyrpld is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by
  the Free Software Foundation; however ONLY version 2 of the License.

  ttyrpld is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program kit; if not, write to:
  Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

  -- For details see doc/GPL2.txt.
=============================================================================*/
#include <sys/stat.h>
#include <sys/time.h> // usec def, gettimeofday()
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <popt.h>
#include <sched.h> // sched_yield()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h> // nsec def, nanosleep()
#include <unistd.h>

#include "dev.h"
#include "rpl_packet.h"
#include "ushared.h"

#define offsetof(type, member) ((size_t)&((type *)NULL)->member)
#define containerof(var, type, member) ((void *)(var) - offsetof(type, member))
#define MICROSECOND 1000000
#define NANOSECOND  1000000000
#define BUFSIZE     4096

enum { // current follow mode state:
    FM_NONE    = 0,
    FM_CATCHUP = 1, // playing old content
    FM_LIVE    = 2, // already hit EOF once and waiting for new data
};

enum { // skip types
    SKIP_NONE = 0,
    SKIP_WRPACKET,
    SKIP_TIME,
};

struct {
    double factor;
    int follow;
    long ovcorr;
    struct {
        long val;
        int type;
    } seek_to;
} Opt = {
    .factor   = 1.0,
    .follow   = 0,
    .seek_to  = {
        .type = SKIP_NONE,
    },
};

static void replay_file(int);

static unsigned long calc_ovcorr(unsigned long, int) __attribute__((unused));
static int find_next_packet(int);
static int get_options(int *, const char ***);
inline static int imin(int, int);
static ssize_t read_null(int, size_t);
static ssize_t read_through(int, int, size_t);
static ssize_t read_wait(int, void *, size_t);
static ssize_t read_waitfm(int, void *, size_t, int *);
static int seek_to_end(int);
static int usleep_std(struct timeval *, long *);
static int usleep_ovcorr(struct timeval *, long *);
static void tv_delta(const struct timeval *, const struct timeval *,
 struct timeval *);
inline static unsigned long long tv2usec(const struct timeval *);
inline static void usec2tv(unsigned long long, struct timeval *);

static int (*usleep_chosen)(struct timeval *, long *) = usleep_ovcorr;

//-----------------------------------------------------------------------------
int main(int argc, char **argv) {
    if(!get_options(&argc, (const char ***)&argv)) { return EXIT_FAILURE; }

    printf(
      "> ttyreplay\n"
      "This program comes with ABSOLUTELY NO WARRANTY; it is free software\n"
      "and you are welcome to redistribute it under certain conditions;\n"
      "for details see the doc/GPL2.txt file which should have come with\n"
      "this program.\n\n"
    );

    Opt.ovcorr = calc_ovcorr(0, 100);

    while(*argv != NULL) {
        int fd;
        if((fd = open(*argv, O_RDONLY)) < 0) {
            fprintf(stderr, "Could not open %s: %s\n",
             *argv, strerror(errno));
            ++argv;
            continue;
        }

        replay_file(fd);
        close(fd);
        ++argv;
    }

    return EXIT_SUCCESS;
}

static void replay_file(int fd) {
    struct log_packet packet;
    struct timeval stamp;
    ssize_t eax;
    long skew = 0;
    int tick = 0;

    if(Opt.follow == FM_LIVE && !seek_to_end(fd)) {
        return;
    }/* else {
        if(Opt.seek_to.type == SKIP_WRPACKET) {
            
        }
    }*/

    while((eax = read_waitfm(fd, &packet, sizeof(struct log_packet),
     &Opt.follow)) == sizeof(struct log_packet)) {
        struct timeval delta;

        if(packet.p.magic != MAGIC_SIG) {
            fprintf(stderr, "\n" "<Packet inconsistency! "
             "Trying to find next valid packet.>\n");
            tick = 0;
            /* If read() in find_next_packet() generates an error, it will be
            catched when the condition is re-evaulated upon continue. */
            find_next_packet(fd);
            continue;
        }

        if(!tick) {
            // No delay after the first packet has been read...
            ++tick;
        } else if(Opt.follow != FM_LIVE) {
            /* ... only when we know the next packet and can calculate the
            time difference. We do not execute this code in FM_LIVE mode,
            because we are already waiting enough with read_wait(). In fact,
            allowing this block for FM_LIVE creates a skew! */
            tv_delta(&stamp, &packet.tv, &delta);
            if(Opt.factor != 1.0) {
                usec2tv(tv2usec(&delta) / Opt.factor, &delta);
            }
            usleep_chosen(&delta, &skew);
        }

        memcpy(&stamp, &packet.tv, sizeof(struct timeval));
        switch(packet.p.event) {
            case EV_WRITE:
                read_through(fd, STDOUT_FILENO, packet.p.size);
                break;
            case EV_CLOSE:
                fprintf(stderr, "\n" "<tty device has been closed>\n");
                // fallthrough
            default:
                read_null(fd, packet.p.size);
                break;
        }
    }

    if(eax < 0) {
        perror("\n" "<Error while reading from stream>\n" "read() returned:");
    }

    printf("\n" "<Log replaying finished>\n");
    return;
}

//-----------------------------------------------------------------------------
static unsigned long calc_ovcorr(unsigned long ad, int rd) {
    struct timespec s = {.tv_sec = 0, .tv_nsec = ad};
    struct timeval start, stop;
    unsigned long av = 0;
    int count = rd;

    fprintf(stderr, "Calculating average overhead...");

    while(count--) {
        gettimeofday(&start, NULL);
        nanosleep(&s, NULL);
        gettimeofday(&stop, NULL);
        av += MICROSECOND * (stop.tv_sec - start.tv_sec) +
         stop.tv_usec - start.tv_usec;
    }

    av /= rd;
    fprintf(stderr, " %lu s\n", av);
    return av;
}

static int find_next_packet(int fd) {
#define LZ           sizeof(struct log_packet)
#define BZ           (2 * LZ)
#define MAGIC_OFFSET offsetof(struct log_packet, p.magic)
    char buf[BZ];
    struct log_packet *packet = (void *)buf;
    size_t s;
    int ok = 0;

    read_wait(fd, buf, BZ);

    while(1) {
        char *ptr;

        /* Indeed, the many read() calls get more and more data from the
        stream without displaying it. Anyway, if we found a way into this
        function, there is a reason to it. */

        if((ptr = memchr(buf, MAGIC_SIG, BZ)) != NULL &&
         ptr - buf >= MAGIC_OFFSET) {
            // A magic byte has been found and the packet start is complete
            char *ctx = containerof(ptr, struct log_packet, p.magic);
            if(ctx != buf) {
                size_t cnt = buf + BZ - ctx;
                ctx = memmove(buf, ctx, cnt);
                read_wait(fd, buf + cnt, BZ - cnt);
            }
        } else if(ptr != NULL) {
            /* Magic byte, but of no use. Discard it, and fill up with new
            data from the descriptor. */
            size_t cnt = buf + BZ - ptr - 1;
            memmove(buf, ptr + 1, cnt);
            read_wait(fd, buf + cnt, BZ - cnt);
            continue;
        } else {
            /* No magic byte, but since it might be just the next byte in
            the stream, only read LZ bytes. */
            memmove(buf, buf + BZ - LZ, BZ - LZ);
            read_wait(fd, buf + LZ, BZ - LZ);
            continue;
        }

        s = packet->p.size;
        if(s > 4096) {
            /* The default tty buffer size is 4096, and any size above this is
            questionable at all. Start with a new slate. */
            ok = 0;
            memmove(buf, buf + LZ, BZ - LZ);
            read_wait(fd, buf, BZ);
            continue;
        }

        if(s < LZ) {
            memmove(buf, buf + LZ + s, BZ - LZ - s);
            read_wait(fd, buf + BZ - LZ - s, LZ + s);
        } else {
            /* There is no header (according to p.size) in our buffer, so we
            can blindly munge lots of data. */
            read_null(fd, s - LZ);
            read_wait(fd, buf, BZ);
        }

        if((ptr = memchr(buf, MAGIC_SIG, MAGIC_OFFSET + 1)) == NULL ||
         ptr - buf != MAGIC_OFFSET) {
            /* If the size field does not match up with the next magic byte,
            drop it all. */
            ok = 0;
            continue;
        }

        if(++ok >= 2) { break; }
    }

    fprintf(stderr, "\n" "<Found packet boundary>\n");

    // Finally adjust the read pointer to a packet boundary
    if((s = packet->p.size) < 16) {
        /* Mmhkay, there is another header other than the current (packet)
        in the buffer. Crap, another one gone. */
        memmove(buf, buf + LZ + s, BZ - LZ - s);
        read_wait(fd, buf + BZ - LZ - s, s);
        read_null(fd, packet->p.size);
    } else {
        // Just subtract what we have already read into the buffer
        read_null(fd, s - LZ);
    }

    return 1;
#undef BZ
#undef LZ
#undef MAGIC_OFFSET
}

static int get_options(int *argc, const char ***argv) {
    static const char *_empty_argv[] = {NULL, NULL};
    struct poptOption options_table[] = {
#define LONGER_OPTION(a, b) (((a) << 8) | (b))
        {"std", 0, POPT_ARG_NONE, NULL, LONGER_OPTION('-', 's'),
         "Do not use sleep() overhead correction", NULL},
        {NULL, 'F', POPT_ARG_NONE, NULL, 'F',
         "Live feed follow mode (like `tail -f`)", NULL},
        {NULL, 'J', POPT_ARG_STRING, &Opt.seek_to, 'J',
         "Seek to time relative to the start of the log"
         " (HH:MM:SS or total SEC)", "tspec"},
        {NULL, 'S', POPT_ARG_DOUBLE, &Opt.factor, '\0',
         "Speed factor (default: 1.0)", NULL},
        {NULL, 'f', POPT_ARG_NONE, NULL, 'f',
         "Catch-up follow mode (play file, switch to live feed on EOF)", NULL},
        {NULL, 'j', POPT_ARG_LONG, &Opt.seek_to.val, 'j',
         "Skip the given number of EV_WRITE packets", NULL},
        POPT_AUTOHELP
        POPT_TABLEEND
    };

    poptContext ctx;
    const char **args;
    int c, argk;

    ctx = poptGetContext(**argv, *argc, *argv, options_table, 0);
    while((c = poptGetNextOpt(ctx)) >= 0) {
        switch(c) {
            case LONGER_OPTION('-', 's'):
                /* Don't use it, it just does not work very nice with high -S
                speeds and short delays. */
                usleep_chosen = usleep_std;
                break;
            case 'F':
                Opt.follow = FM_LIVE;
                break;
            case 'J':
                Opt.seek_to.type = SKIP_TIME;
            case 'f':
                Opt.follow = FM_CATCHUP;
                break;
            case 'j':
                Opt.seek_to.type = SKIP_WRPACKET;
                break;
        }
    }

    if(c < -1) {
        fprintf(stderr, "%s: %s\n", poptBadOption(ctx, 0), poptStrerror(c));
        poptFreeContext(ctx);
        return 0;
    }

    if((args = poptGetArgs(ctx)) != NULL) {
        argk = SH_count_args(args);
        poptDupArgv(argk, args, argc, argv);
    } else {
        *argc = 0;
        *argv = _empty_argv;
    }

    poptFreeContext(ctx);
    return 1;
#undef LONGER_OPTION
}

inline static int imin(int a, int b) {
    return a < b ? a : b;
}

static ssize_t read_null(int fd, size_t count) {
    /* Read and discard data (i.e. do not return it to the caller).
    Nice workaround for lseek() on unseekable descriptors. */
    char buf[BUFSIZE];
    size_t rem = count;

    while(rem > 0) {
        ssize_t eax = read(fd, buf, imin(BUFSIZE, rem));
        if(eax < 0) { return 0; }
        if(eax == rem) { break; }
        rem -= eax;
        sched_yield();
    }

    return count;
}

static ssize_t read_through(int in, int out, size_t count) {
    // Read from IN and directly give it to OUT. Like sendfile().
    char buf[BUFSIZE];
    size_t rem = count;

    while(rem > 0) {
        ssize_t eax = read(in, buf, imin(BUFSIZE, rem));
        if(eax < 0) { return 0; }
        write(out, buf, eax);
        if(eax == rem) { break; }
        rem -= eax;
        sched_yield();
    }

    return count;
}

static ssize_t read_wait(int fd, void *buf, size_t count) {
    /* A wrapper for read() which guarantees that all bytes requested will
    be in BUF after read_wait() returns. (Except if there is an error.)
    Note that it will retry to read when it hits EOF, so only use this on files
    which are still being written to! */
    size_t rem = count;

    while(rem > 0) {
        ssize_t eax = read(fd, buf, rem);
        if(eax < 0) { return 0; }
        if(eax == rem) { break; }
        buf += eax;
        rem -= eax;
        sched_yield(); // usleep(1) could also be a possibility
    }
    return count;
}

static ssize_t read_waitfm(int fd, void *buf, size_t count, int *follow) {
    /* Wrapper function for either read() or read_wait(). If EOF has been
    detected (ATM), we switch into FM_LIVE mode (if currently in FM_CATCHUP
    mode). */
    struct stat sb;
    off_t pos;

    if(!*follow) {
        /* If no follow mode is selected, the while() loop in replay_file()
        shall terminate as soon as we encounter EOF or have a short read (i.e.
        read less then conut). */
        return read(fd, buf, count);
    }

    /* If it is a live feed (follow mode), complete the read (i.e. read all
    requested bytes. */
    if(*follow != FM_LIVE && fstat(fd, &sb) == 0 &&
     (pos = lseek(fd, 0, SEEK_CUR)) != 0 && pos == sb.st_size) {
        fprintf(stderr, "\n" "<Switching to live feed follow mode>\n");
        *follow = FM_LIVE;
    }

    return read_wait(fd, buf, count);
}

static int seek_to_end(int fd) {
    if(lseek(fd, 0, SEEK_END) == -1 && errno == ESPIPE) {
        // Some workaround for non-seekable descriptors
        char buf[BUFSIZE];
        fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);

        fprintf(stderr,
          "Reading from something that does not support seeking (a pipe?),\n"
          "skipping to a position where reading would block\n"
        );

        while(read(fd, buf, BUFSIZE) > 0);
        if(errno != EAGAIN) {
            perror("Error while read()ing");
            return 0;
        }
        fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) & ~O_NONBLOCK);
    }

    return find_next_packet(fd);
}

static int usleep_std(struct timeval *req, long *skew) {
    struct timespec nano_req = {
        .tv_sec  = req->tv_sec,
        .tv_nsec = req->tv_usec * 1000,
    };
    return nanosleep(&nano_req, NULL);
}

static int usleep_ovcorr(struct timeval *req, long *skew) {
    /* The manual page for nanosleep(2) says this under the "BUGS" section:

      >The current implementation of nanosleep is  based  on  the
      >normal  kernel  timer mechanism, which has a resolution of
      >1/HZ s (i.e, 10 ms on Linux/i386 and 1 ms on Linux/Alpha).

    (*) for i386/2.4: HZ=100, i.e. 10 ms (10000 s)
        for  all/2.6: HZ=1000, i.e. 1 ms (1000 s)

      >Therefore, nanosleep pauses always for at least the speci-
      >fied time, however it can take up to  10  ms  longer  than
      >specified  until  the  process becomes runnable again. [...]

      >As some applications  require  much  more  precise  pauses
      >(e.g.,  in  order to control some time-critical hardware),
      >nanosleep is also capable of short high-precision  pauses.
      >If  the process is scheduled under a real-time policy like
      >SCHED_FIFO or SCHED_RR, then pauses of up to 2 ms will  be
      >performed as busy waits with microsecond precision.

    Unfortunately, repeated busy waits lock out all other applications, so
    selecting another scheduling policy is inacceptable for ttyreplay, also
    because selecting another policy is only available to the superuser.

    Thus, I have designed a function that tries to cope with the longer delays
    by waiting a little shorter the next time when the previous call took
    longer. The minimal margin of course still is 1/100 (1/1000) s, why the
    algorithm splits up into multiple cases to make the best performance. They
    are explained below. */

    struct timeval start, stop;
    struct timespec nano_req;
    long long req_usec;
    long dur, over;
    int rv;

    /* The following if() block involves a "lot" of 64-bit calculations,
    which take more time on 32-bit archs. Even if my current AMD runs at
    approx. 30 billion instructions/second, it is only fair to add these few
    nanoseconds to the skew. To be non-plus-ultra fair, they are only added if
    we do not do an early return (case 2 & 3). */
    gettimeofday(&start, NULL);
    req_usec = tv2usec(req);

    if(req_usec > Opt.ovcorr) {
        /* If the user requests a delay which is greater than the minimal delay
        (calculated by calc_ovcorr()), we can simply take away some time from
        the request. */

        if(*skew + Opt.ovcorr <= req_usec) {
            /* If the accumulated skew time plus the minimal delay fits into
            the request, the request is reduced and the skew is zeroed. */
            usec2tv(req_usec -= *skew + Opt.ovcorr, req);
            *skew = 0;
        } else {
            /* There is more skew than the current request, so we can take away
            at most the requested time. (In simple language: We already paused
            enough in the past so we can entirely skip this pause.) */
            *skew -= req_usec;
            return 0;
        }
    } else if(*skew >= -Opt.ovcorr) {
        /* The code is the same as the above case, the intent too, but with a
        specialty: The skew can become negative (i.e. we paused too few) up to
        a specific degree (1/HZ s). */
        *skew -= req_usec;
        return 0;
    }

    /* If none of these three cases, or case 2 applies, nanosleep() will be
    called with the time request. */
    nano_req.tv_sec  = req->tv_sec;
    nano_req.tv_nsec = req->tv_usec * 1000,
    rv = nanosleep(&nano_req, NULL);
    gettimeofday(&stop, NULL);

    /* Calculate the actual duration of nanosleep(), and from that, the
    overhead (actual time minus wanted time) which is added to the skew. */
    dur = MICROSECOND * (stop.tv_sec - start.tv_sec) +
     (stop.tv_usec - start.tv_usec);
    // using req_usec is a little trick to save 64-bit calculations
    over = dur - req_usec;
    *skew += over;

    return rv;
}

static void tv_delta(const struct timeval *past, const struct timeval *now,
 struct timeval *dest) {
    /* Calculates the time difference between "past" and "now" and stores the
    result in "dest". All parameters in s. */
    unsigned long sec = now->tv_sec - past->tv_sec;
    long acc = now->tv_usec - past->tv_usec;

    if(acc < 0) {
        // past: 1.5, now: 2.0, sec = 2 - 1 = 1, acc = 0 - 500000 = -500000;
        dest->tv_sec = --sec;
        dest->tv_usec = MICROSECOND + acc;
    } else {
        dest->tv_sec = sec;
        dest->tv_usec = acc;
    }

    return;
}

inline static unsigned long long tv2usec(const struct timeval *req) {
    return req->tv_sec * MICROSECOND + req->tv_usec;
}

inline static void usec2tv(unsigned long long usec, struct timeval *req) {
    req->tv_sec  = usec / MICROSECOND;
    req->tv_usec = usec % MICROSECOND;
    return;
}

//==[ End of file ]============================================================
