Make fft work, add sample rate to stabilize waveform

This adds a sliding window buffer approach to the audio sample data as well as force the user to set the sampling rate of the input data. This was needed to stabilize fft output to a usable degree. Also the fft output is now in db, which makes it a lot better to look at. Signed-off-by: tyrolyean <tyrolyean@tyrolyean.net>
2022-07-20 23:43:41 +02:00 · 2022-07-20 23:43:41 +02:00 · 664f8d0305
commit 664f8d0305
parent b2fa322e30
2 changed files with 95 additions and 27 deletions
--- a/shadermeh.c
+++ b/shadermeh.c
@ -21,47 +21,101 @@ static GLfloat vertex_buffer[] = {
 };

 static GLubyte audio_buffer[AUDIO_SAMPLES * AUDIO_CHANNELS];
-static float audio_sample_data[AUDIO_SAMPLES];
-static fftw_complex fftw_in[AUDIO_SAMPLES];
-static fftw_complex fftw_out[AUDIO_SAMPLES];
+size_t sample_pointer = 0;
+size_t sample_data_pointer = 0;
+size_t sample_rate = 0;
+static float *audio_sample_data;
+static float *audio_receive_data;
+static double *fftw_in;
+static fftw_complex *fftw_out;
 static fftw_plan plan;

-static int try_fetch_audio(void)
+static int try_fetch_audio(float iTimeDelta)
 {
-	size_t i, count = 0;
-	int ret;
+	/* To avoid generating stale images, we keep our own sample buffer,
+	 * which is then used to move a sliding window of data for the fft and
+	 * wave samples. We need to do this, as otherwise we would set an upper
+	 * limit of fps (20 at 4800kHz sample rate), which would not be good.
+	 * The size of the window is set in the header file. The with our
+	 * approach is that the buffer allows for drifting to occur within the
+	 * buffer limits. If you buffer is 3s long the delay can grow to 3s.
+	 * Choose your buffer size wisely for your application.
+	 */
+	size_t i;
+	ssize_t ret = 0;
+	memset(audio_receive_data, 0, AUDIO_BUFFER_SIZE * 
+		sizeof(*audio_receive_data));
+	sample_pointer += (sample_rate * iTimeDelta);

 	for (;;) {
-		ret = read(STDIN_FILENO,
-			   (char *)audio_sample_data + count,
-			   sizeof(audio_sample_data) - count);
+		ret = read(STDIN_FILENO, (char *)audio_receive_data,
+			sizeof(*audio_receive_data)*AUDIO_BUFFER_SIZE);

 		if (ret < 0) {
 			if (errno == EINTR)
 				continue;
-			if (errno == EAGAIN)
+			if (errno == EAGAIN || errno == EWOULDBLOCK)
 				break;
 			perror("stdin");
 			return -1;
 		}

-		if (ret == 0)
+		if (ret == 0 || ret % sizeof(float) != 0){
 			break;
+		}

-		count += ret;
+		ret /= 4;
+		if((ret + sample_pointer) > AUDIO_BUFFER_SIZE){
+			/* Not enough storage space to store all new audio data,
+			 * will override not output data with new one */
+			 memset(audio_sample_data, 0, 
+				AUDIO_BUFFER_SIZE * sizeof(*audio_sample_data));
+			 memcpy(audio_sample_data, audio_receive_data, 
+				ret * sizeof(*audio_sample_data));
+			 sample_pointer = 0;
+			 sample_data_pointer = ret;
+		}else{
+			memmove(audio_sample_data,
+				&audio_sample_data[sample_pointer], 
+				(AUDIO_BUFFER_SIZE - sample_pointer)*
+				sizeof(*audio_sample_data));
+			if(sample_data_pointer <= sample_pointer){
+				sample_data_pointer = 0;
+
+			}else{
+				sample_data_pointer -= sample_pointer;
+
+			}
+			sample_pointer = 0;
+			size_t len = ret;
+			if((ret + sample_data_pointer) >= AUDIO_BUFFER_SIZE){
+				len = AUDIO_BUFFER_SIZE - sample_data_pointer;
+			}
+			memcpy(&audio_sample_data[sample_data_pointer], 
+				audio_receive_data, len * sizeof(float));
+			sample_data_pointer += len;
+			break;
+		}
+
+	}
+	if((sample_pointer+AUDIO_FFT_SIZE) >= sample_data_pointer){
+		fprintf(stderr, "shadermeh input to slow %zu > %zu! wrapping around!\n", sample_pointer+AUDIO_FFT_SIZE, sample_data_pointer);
+		sample_pointer = 0;
 	}

-	for (i = 0; i < AUDIO_SAMPLES; ++i)
-		fftw_in[i][0] = audio_sample_data[i];
+	memset(fftw_in, 0, sizeof(*fftw_in) * AUDIO_BUFFER_SIZE);
+	memset(fftw_out, 0, sizeof(*fftw_out) * AUDIO_BUFFER_SIZE);
+
+	for (i = 0; i < AUDIO_FFT_SIZE; ++i)
+		fftw_in[i] = audio_sample_data[sample_pointer+i];

 	fftw_execute(plan);

 	for (i = 0; i < AUDIO_SAMPLES; ++i) {
-		float x = fftw_out[i][0], y = fftw_out[i][1];
-		float a = sqrt(x * x + y * y);
+		float a = cabs(fftw_out[i]);

-		audio_buffer[i + AUDIO_SAMPLES] = audio_sample_data[i] * 127.0f	+ 127.0f;
-		audio_buffer[i] = 127.0f + a * 127.0f;
+		audio_buffer[i + AUDIO_SAMPLES] = audio_sample_data[sample_pointer+i] * 127.0f	+ 127.0f;
+		audio_buffer[i] = log(fabsf(a)+1) * 50;
 	}

 	return 0;
@ -159,11 +213,11 @@ static const struct option long_opts[] = {
 	{ "height", required_argument, NULL, 'h' },
 	{ "shader", required_argument, NULL, 's' },
 	{ "to-stdout", no_argument, NULL, 'S' },
-	{ "stdin-audio", no_argument, NULL, 'a' },
+	{ "stdin-audio", required_argument, NULL, 'a' },
 	{ NULL, 0, NULL, 0 },
 };

-static const char *short_opts = "w:h:s:Sa";
+static const char *short_opts = "w:a:h:s:S";

 static const char *usage_str =
 "shadermeh OPTIONS...\n"
@ -174,7 +228,7 @@ static const char *usage_str =
 "  --height, -h <pixels>\n"
 "\n"
 "  --to-stdout, -S              Poop raw RGB24 frames to stdout (blocking)\n"
-"  --stdin-audio, -a            Read raw PCM audio from stdin (non-blocking)\n"
+"  --stdin-audio, -a <sample rate>     Read raw PCM audio from stdin (non-blocking)\n"
 "\n"
 "  --shader, -s <shader file>\n"
 "\n";
@ -188,7 +242,7 @@ int main(int argc, char **argv)
 	void *fb32 = NULL, *fb24 = NULL;
 	const char *shader_file = NULL;
 	GLint major, minor, prog;
-	float iTime, iTimeDelta;
+	float iTime, iTimeDelta = 0;
 	bool have_audio = false;
 	bool to_stdout = false;
 	window *wnd;
@ -218,6 +272,11 @@ int main(int argc, char **argv)
 			break;
 		case 'a':
 			have_audio = true;
+			sample_rate = strtol(optarg, NULL, 10);
+			audio_sample_data = malloc(AUDIO_BUFFER_SIZE * 
+				sizeof(float));
+			audio_receive_data = malloc(AUDIO_BUFFER_SIZE * 
+				sizeof(float));
 			break;
 		default:
 			fputs(usage_str, stderr);
@ -341,8 +400,12 @@ int main(int argc, char **argv)
 	glBindSampler(0, sampler_sound);

 	if (have_audio) {
-		plan = fftw_plan_dft_1d(AUDIO_SAMPLES, fftw_in, fftw_out,
-					FFTW_FORWARD, FFTW_ESTIMATE);
+		fftw_in = fftw_alloc_real(AUDIO_BUFFER_SIZE);
+		fftw_out = fftw_alloc_complex(AUDIO_BUFFER_SIZE);
+		if(fftw_in == NULL || fftw_out == NULL)
+			goto fail_vao;
+		plan = fftw_plan_dft_r2c_1d(AUDIO_BUFFER_SIZE, fftw_in, fftw_out,
+			 FFTW_MEASURE);
 	}

 	/******************** framebuffer object ********************/
@ -377,7 +440,7 @@ int main(int argc, char **argv)
 		glClear(GL_COLOR_BUFFER_BIT);

 		if (have_audio) {
-			if (try_fetch_audio())
+			if (try_fetch_audio(iTimeDelta))
 				break;

 			glBindTexture(GL_TEXTURE_2D, sound_tex);
@ -451,6 +514,8 @@ fail_vao:
 	window_make_current(NULL);
 	free(fb32);
 	free(fb24);
+	fftw_free(fftw_in);
+	fftw_free(fftw_out);
 	window_destroy(wnd);
 	return EXIT_SUCCESS;
 }
--- a/shadermeh.h
+++ b/shadermeh.h
@ -27,10 +27,13 @@
 #include <poll.h>
 #include <time.h>

-#include <fftw3.h>
 #include <math.h>
+#include <complex.h>
+#include <fftw3.h>

-#define AUDIO_SAMPLES (512)
+#define AUDIO_SAMPLES (4096)
+#define AUDIO_BUFFER_SIZE (sample_rate * 3)
+#define AUDIO_FFT_SIZE (AUDIO_SAMPLES * 2)
 #define AUDIO_CHANNELS (2)

 typedef struct {