/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 *
 * This tool used OpenAL and fftw to compute the spectrogram for an audio
 * stream and print it to the standard output. To preprocess the data, Hamming
 * window function is used. For stereo, the tool only uses the first channel.
 *
 * For identical values of parameters the program is known to provide output
 * which matches the output of MATLAB R2009b's spectrogram() function.
 *
 * The program outputs the raw complex values for the spectrogram in CSV
 * format. See consume_fft() function if you want to do something else with the
 * spectrogram. 
 *
 * For available options, either consult "./alsgram --help" or the source code.
 *
 * Author: Vlad Sukhoy <sukhoy@iastate.edu>
 *
 * The author disclaims copyright to this source code.
 *
 * To use this program you will need:
 * 1. An OpenAL implementation.
 * 2. An ALUT implementation.
 * 4. fftw with major version 3.
 * 3. GNU argp (typically included in libc).
 *
 * To build from shell, invoke
 * $ gcc alsgram.c -lopenal -lalut -lfftw3 -o alsgram
 *
 * This program is known to compile and work on a GNU/Linux system with:
 *  openal-soft 1.11.753 (implementation of OpenAL) and freealut 1.1.0
 *  (implementation of ALUT) and fftw 3.1.2.
 *
 * You might need to modify the code to run on either non-Linux OS or on a
 * processor with an instruction set that is not derived from x86 or to compile
 * it with a non-gcc compiler.
 *
 * For documentation on OpenAL and ALUT, see
 *  http://connect.creativelabs.com/openal/
 * For documentation on fftw, see
 *  http://www.fftw.org/
 */
#include <AL/alut.h>
#include <argp.h>      /* for argp_parse() */
#include <assert.h>
#include <fftw3.h>
#include <math.h>
#include <stdint.h>    /* for intN_t types */
#include <string.h>    /* for memmove() */
#include <stdlib.h>

/* data structure to hold global settings of the program */
static struct {
  const char *input;
  const char *output;
  int printversion;
  int verbose;
  int nfft;
  int window;
  int noverlap;
} settings = { /* input        = */  "/dev/stdin",
	       /* output       = */ "/dev/stdout",
	       /* printversion = */             0,
	       /* verbose      = */             0,
	       /* nfft         = */           256,
	       /* window       = */           256,
	       /* noverlap     = */            -1};

enum { key_nfft = 0x1000, key_window, key_noverlap };

const char *version = "1.0";
static char *about = "alsgram -- the program to compute and output the \
spectrogram for audio data";
static struct argp_option opts[] = {
  {"input", 'i', "PATH", 0, "Path to the input audio file. The default is to \
read audio data from the standard input."},
  {"output", 'o', "PATH", 0, "Path to the output. The default is to write \
the FFT to the standard output."},
  {"version", 'V', 0, 0, "Print version and exit."},
  {"verbose", 'v', 0, 0, "Be verbose."},
  {"nfft", key_nfft, "NUMBER", 0, "Number of frequency bins in FFT. \
The default is 256."},
  {"window", key_window, "NUMBER", 0, "FFT window size. The default is 256."},
  {"noverlap", key_noverlap, "NUMBER", 0, "Amount of overlap between FFT \
windows. The default is one half of the window."},
  {}};

/* callback to GNU argp to process the command line options */
static error_t
parse_opt(int key, char *arg, struct argp_state *state) {
  switch (key) {
  case 'i':
    settings.input = arg;
    break;
  case 'o':
    settings.output = arg;
    break;
  case 'V':
    settings.printversion = 1;
    break;
  case 'v':
    settings.verbose = 1;
    break;
  case key_nfft:
    settings.nfft = atoi(arg);
    break;
  case key_window:
    settings.window = atoi(arg);
    break;
  case key_noverlap:
    settings.noverlap = atoi(arg);
    break;
  default:
    return ARGP_ERR_UNKNOWN;
  };
  return 0;
}

static void
report_alut_error() {
  fprintf(stderr, "ALUT error: %s\n", alutGetErrorString(alutGetError()));
}

/* convert OpenAL audio format to a human-readable string */
static const char*
alfmt2str(ALenum fmt) {
  switch (fmt) {
  case AL_FORMAT_MONO8:
    return "8-bit mono";
  case AL_FORMAT_MONO16:
    return "16-bit mono";
  case AL_FORMAT_STEREO8:
    return "8-bit stereo";
  case AL_FORMAT_STEREO16:
    return "16-bit stereo";
  default:
    assert(0); /* unknown OpenAL format */
    return "UNKNOWN";
  };
}

/* given the number of bytes in the audio and its format, compute the number of
   samples */
static ALsizei
alnsamps(ALsizei nbytes, ALenum fmt) {
  assert(nbytes >= 0);
  switch (fmt) {
  case AL_FORMAT_MONO8:
    return nbytes;
  case AL_FORMAT_MONO16:
  case AL_FORMAT_STEREO8:
    assert(!(nbytes % 2)); /* no dangling bytes */
    return nbytes / 2;
  case AL_FORMAT_STEREO16:
    assert(!(nbytes % 4)); /* no dangling bytes */
    return nbytes / 4;
  default:
    assert(0); /* unknown OpenAL format */
    return 0;
  }
}

/* data structure for one sample that's more uniform than native formats */
struct sample {
  double ch[2]; /* the data is normalized into range [-1, 1] */
};

/* get the next sample from a pointer into the stream.
   The pointer is advanced. */
static inline struct sample
next_sample(ALvoid **p, ALenum fmt) {
  struct sample rv;
  static const union { unsigned char c[8]; double d; }
  nan = {{0, 0, 0, 0, 0, 0, 0xf8, 0x7f}};

  switch (fmt) {
  case AL_FORMAT_MONO8: {
    uint8_t *samp = *p;
    rv.ch[0] = ((*samp++) - 128.0) / 128.0;
    rv.ch[1] = nan.d;
    *p = samp;
  }
    break;
  case AL_FORMAT_MONO16: {
    int16_t *samp = *p;
    rv.ch[0] = (*samp++) / 32768.0;
    rv.ch[1] = nan.d;
    *p = samp;
  }
    break;
  case AL_FORMAT_STEREO8: {
    uint8_t *samp = *p;
    rv.ch[0] = ((*samp++) - 128.0) / 128.0;
    rv.ch[1] = ((*samp++) - 128.0) / 128.0;
    *p = samp;
  }
    break;
  case AL_FORMAT_STEREO16: {
    int16_t *samp = *p;
    rv.ch[0] = (*samp++) / 32768.0;
    rv.ch[1] = (*samp++) / 32768.0;
    *p = samp;
  }
    break;
  default:
    rv.ch[0] = rv.ch[1] = nan.d;
  };
  return rv;
}

/* Computes the Hamming window function for a window of size N */
static inline double hamming(unsigned int n, const unsigned int N) {
  return 0.54 - 0.46*cos(2*M_PI*n/(N - 1));
}

/* Consumes the output of FFT. Replace with your own code to work with it */
static void
consume_fft(fftw_complex *fft_out, int n) {
  int i;
  for (i=0; i<n; ++i) {
    printf("%.4f+%.4f*I%s", fft_out[i][0], fft_out[i][1], 
	   i == (n-1) ? "\n":",");
  } 
}

/* Implements spectrogram machinery for the data in a window:
 * 1. Applies Hamming window function to the raw data.
 * 2. Computes the FFT.
 * 3. Calls consume_fft() to process the result.
 */
static void
process_window(double *win, int winsamps, double *fft_in, fftw_complex *fft_out,
	       fftw_plan *plan) {
  int i;
  for (i=0; i<winsamps; ++i) {
    fft_in[i % settings.nfft] = hamming(i, settings.window)*win[i];
    if (!((i+1) % settings.nfft)) {
      fftw_execute(*plan);
      consume_fft(fft_out, settings.nfft/2 + 1);
    }
  }
  /* if (i % settings.nfft > 0) {
     process leftovers 
    for (i %= settings.nfft; i < settings.nfft; ++i)
      fft_in[i] = 0.0;
    fftw_execute(*plan);
    consume_fft(fft_out, settings.nfft/2 + 1);
    }*/
}

int 
main(int argc, char **argv) {
  struct argp ap = {opts, parse_opt, 0, about};
  argp_parse(&ap, argc, argv, 0, 0, &settings);
  if (settings.printversion) {
    printf("Version: %s\n", version);
    return 0;
  }
  alutInit(&argc, argv);
  ALenum fmt = 0;
  ALsizei nbytes = 0, nsamps;
  ALfloat freq = 0;
  ALvoid *data = alutLoadMemoryFromFile(settings.input, &fmt, &nbytes, &freq);
  if (!data) {
    report_alut_error();
    return 1;
  }
  nsamps = alnsamps(nbytes, fmt);
  if (settings.verbose) {
    printf("Format:\t\t%s\n", alfmt2str(fmt));
    printf("Frequency:\t%.3f kHz\n", freq/1000.0);
    printf("Total samples:\t%d\n", nsamps);
  }
  ALsizei sidx;
  ALvoid *p = data;
  /* compute proper value for the window overlap */
  int noverlap = (settings.noverlap>=0) ? settings.noverlap:(settings.window/2);
  if (noverlap > settings.window) {
    fprintf(stderr, "Invalid value for overlap: %d\n", noverlap);
    return 1;
  }
  double win[settings.window]; /* use gcc's volatile size var on stack */
  /* fftw_malloc() produces nice aligned pointers useful for SIMD in FFT */
  double *fft_in = fftw_malloc(sizeof(double)*settings.nfft);
  fftw_complex *fft_out = fftw_malloc(sizeof(fftw_complex)*(settings.nfft/2+1));
  /* if you are here, you definitely need a plan */
  fftw_plan plan = fftw_plan_dft_r2c_1d(settings.nfft, fft_in, fft_out,
					FFTW_ESTIMATE);
  int winsamps = 0; /* number of samples in the window */
  for (sidx=0; sidx<nsamps; ++sidx) {
    if (winsamps == settings.window) {
      process_window(win, winsamps, fft_in, fft_out, &plan);
      /* move the last noverlap samples in the window to its front */
      memmove(win, &win[settings.window-noverlap], sizeof(double)*noverlap);
      winsamps = noverlap;
    }
    struct sample s = next_sample(&p, fmt);
    win[winsamps++] = s.ch[0];
  }
  /* process leftovers */
  if (winsamps > 0)
    process_window(win, winsamps, fft_in, fft_out, &plan);

  /* cleanup */
  fftw_free(fft_in);
  fftw_free(fft_out);
  free(data);
  alutExit();
  return 0;
}
