00001 /* Sonic library 00002 Copyright 2010 00003 Bill Cox 00004 This file is part of the Sonic Library. 00005 00006 The Sonic Library is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Lesser General Public 00008 License as published by the Free Software Foundation; either 00009 version 2.1 of the License, or (at your option) any later version. 00010 00011 The GNU C Library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Lesser General Public License for more details. 00015 00016 You should have received a copy of the GNU Lesser General Public 00017 License along with the GNU C Library; if not, write to the Free 00018 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 00019 02111-1307 USA. */ 00020 00021 /* 00022 The Sonic Library implements a new algorithm invented by Bill Cox for the 00023 specific purpose of speeding up speech by high factors at high quality. It 00024 generates smooth speech at speed up factors as high as 6X, possibly more. It is 00025 also capable of slowing down speech, and generates high quality results 00026 regardless of the speed up or slow down factor. For speeding up speech by 2X or 00027 more, the following equation is used: 00028 00029 newSamples = period/(speed - 1.0) 00030 scale = 1.0/newSamples; 00031 00032 where period is the current pitch period, determined using AMDF or any other 00033 pitch estimator, and speed is the speedup factor. If the current position in 00034 the input stream is pointed to by "samples", and the current output stream 00035 position is pointed to by "out", then newSamples number of samples can be 00036 generated with: 00037 00038 out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; 00039 00040 where t = 0 to newSamples - 1. 00041 00042 For speed factors < 2X, an algorithm similar to PICOLA is used. The above 00043 algorithm is first used to double the speed of one pitch period. Then, enough 00044 input is directly copied from the input to the output to achieve the desired 00045 speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived: 00046 00047 speed = (2*period + length)/(period + length) 00048 speed*length + speed*period = 2*period + length 00049 length(speed - 1) = 2*period - speed*period 00050 length = period*(2 - speed)/(speed - 1) 00051 00052 For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into 00053 the output twice, and length of input is copied from the input to the output 00054 until the output desired speed is reached. The length of data copied is: 00055 00056 length = period*(speed - 0.5)/(1 - speed) 00057 00058 For slow down factors between 0.5 and 0.5, no data is copied, and an algorithm 00059 similar to high speed factors is used. 00060 */ 00061 00062 #ifdef __cplusplus 00063 extern "C" { 00064 #endif 00065 00066 /* Uncomment this to use sin-wav based overlap add which in theory can improve 00067 sound quality slightly, at the expense of lots of floating point math. */ 00068 /* #define SONIC_USE_SIN */ 00069 00070 /* This specifies the range of voice pitches we try to match. 00071 Note that if we go lower than 65, we could overflow in findPitchInRange */ 00072 #define SONIC_MIN_PITCH 65 00073 #define SONIC_MAX_PITCH 400 00074 00075 /* These are used to down-sample some inputs to improve speed */ 00076 #define SONIC_AMDF_FREQ 4000 00077 00078 struct sonicStreamStruct; 00079 typedef struct sonicStreamStruct *sonicStream; 00080 00081 /* For all of the following functions, numChannels is multiplied by numSamples 00082 to determine the actual number of values read or returned. */ 00083 00084 /* Create a sonic stream. Return NULL only if we are out of memory and cannot 00085 allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ 00086 sonicStream sonicCreateStream(int sampleRate, int numChannels); 00087 /* Destroy the sonic stream. */ 00088 void sonicDestroyStream(sonicStream stream); 00089 /* Use this to write floating point data to be speed up or down into the stream. 00090 Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */ 00091 int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples); 00092 /* Use this to write 16-bit data to be speed up or down into the stream. 00093 Return 0 if memory realloc failed, otherwise 1 */ 00094 int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples); 00095 /* Use this to write 8-bit unsigned data to be speed up or down into the stream. 00096 Return 0 if memory realloc failed, otherwise 1 */ 00097 int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples); 00098 /* Use this to read floating point data out of the stream. Sometimes no data 00099 will be available, and zero is returned, which is not an error condition. */ 00100 int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples); 00101 /* Use this to read 16-bit data out of the stream. Sometimes no data will 00102 be available, and zero is returned, which is not an error condition. */ 00103 int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples); 00104 /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will 00105 be available, and zero is returned, which is not an error condition. */ 00106 int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples); 00107 /* Force the sonic stream to generate output using whatever data it currently 00108 has. No extra delay will be added to the output, but flushing in the middle of 00109 words could introduce distortion. */ 00110 int sonicFlushStream(sonicStream stream); 00111 /* Return the number of samples in the output buffer */ 00112 int sonicSamplesAvailable(sonicStream stream); 00113 /* Get the speed of the stream. */ 00114 float sonicGetSpeed(sonicStream stream); 00115 /* Set the speed of the stream. */ 00116 void sonicSetSpeed(sonicStream stream, float speed); 00117 /* Get the pitch of the stream. */ 00118 float sonicGetPitch(sonicStream stream); 00119 /* Set the pitch of the stream. */ 00120 void sonicSetPitch(sonicStream stream, float pitch); 00121 /* Get the rate of the stream. */ 00122 float sonicGetRate(sonicStream stream); 00123 /* Set the rate of the stream. */ 00124 void sonicSetRate(sonicStream stream, float rate); 00125 /* Get the scaling factor of the stream. */ 00126 float sonicGetVolume(sonicStream stream); 00127 /* Set the scaling factor of the stream. */ 00128 void sonicSetVolume(sonicStream stream, float volume); 00129 /* Get the chord pitch setting. */ 00130 int sonicGetChordPitch(sonicStream stream); 00131 /* Set chord pitch mode on or off. Default is off. See the documentation 00132 page for a description of this feature. */ 00133 void sonicSetChordPitch(sonicStream stream, int useChordPitch); 00134 /* Get the quality setting. */ 00135 int sonicGetQuality(sonicStream stream); 00136 /* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */ 00137 void sonicSetQuality(sonicStream stream, int quality); 00138 /* Get the sample rate of the stream. */ 00139 int sonicGetSampleRate(sonicStream stream); 00140 /* Get the number of channels. */ 00141 int sonicGetNumChannels(sonicStream stream); 00142 /* This is a non-stream oriented interface to just change the speed of a sound 00143 sample. It works in-place on the sample array, so there must be at least 00144 speed*numSamples available space in the array. Returns the new number of samples. */ 00145 int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch, 00146 float rate, float volume, int useChordPitch, int sampleRate, int numChannels); 00147 /* This is a non-stream oriented interface to just change the speed of a sound 00148 sample. It works in-place on the sample array, so there must be at least 00149 speed*numSamples available space in the array. Returns the new number of samples. */ 00150 int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch, 00151 float rate, float volume, int useChordPitch, int sampleRate, int numChannels); 00152 00153 #ifdef __cplusplus 00154 } 00155 #endif