Transforms¶
A transform is any change you apply to your data to make it cleaner, more meaningful, or more suitable for machine learning.
df['Marks'] = df['Marks'].astype(float)
from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline
pipeline = Pipeline([ ('scale', StandardScaler()), # transform step ('model', LogisticRegression()) ])
### Fast Fourier Transform
- is a mathematical algorithm that effeciently computes the Discrete Fourier Transform (DFT) of a signal
Fourier Transform: The Concept
A Fourier Transform converts a time-domain signal into a frequency-domain representation.
It tells you what frequencies are present in your signal and their amplitudes.
Example: A sound wave over time → shows which notes (frequencies) are in the sound.
import numpy as np import matplotlib.pyplot as plt
Sample signal: 2 sine waves at 5Hz and 20Hz¶
fs = 100 # Sampling frequency (samples per second) t = np.arange(0, 1, 1/fs) signal = np.sin(2np.pi5t) + 0.5np.sin(2np.pi20*t)
Compute FFT¶
fft_values = np.fft.fft(signal) frequencies = np.fft.fftfreq(len(t), 1/fs)
Plot magnitude spectrum¶
plt.plot(frequencies[:len(t)//2], np.abs(fft_values)[:len(t)//2]) plt.title("FFT of Signal") plt.xlabel("Frequency (Hz)") plt.ylabel("Amplitude") plt.show()
Dataset¶
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Example: Student marks dataset
df = pd.DataFrame({
"Student": ["S1","S2","S3","S4","S5","S6","S7","S8"],
"Marks": [75, 85, 60, 90, 70, 80, 65, 95]
})
marks = df["Marks"].values
# Apply FFT
fft_values = np.fft.fft(marks)
frequencies = np.fft.fftfreq(len(marks)) # Normalized frequency (cycles per sample)
plt.figure(figsize=(8,5))
plt.stem(frequencies, np.abs(fft_values), use_line_collection=True)
plt.title("FFT of Student Marks")
plt.xlabel("Frequency (cycles per sample)")
plt.ylabel("Amplitude")
plt.show()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[3], line 16 14 frequencies = np.fft.fftfreq(len(marks)) # Normalized frequency (cycles per sample) 15 plt.figure(figsize=(8,5)) ---> 16 plt.stem(frequencies, np.abs(fft_values), use_line_collection=True) 17 plt.title("FFT of Student Marks") 18 plt.xlabel("Frequency (cycles per sample)") TypeError: stem() got an unexpected keyword argument 'use_line_collection'
<Figure size 800x500 with 0 Axes>
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Sample student marks dataset
df = pd.DataFrame({
"Student": ["S1","S2","S3","S4","S5","S6","S7","S8"],
"Marks": [75, 85, 60, 90, 70, 80, 65, 95]
})
marks = df["Marks"].values
# Compute FFT
fft_values = np.fft.fft(marks)
frequencies = np.fft.fftfreq(len(marks)) # Normalized frequency
# Plot FFT magnitude spectrum
plt.figure(figsize=(8,5))
plt.stem(frequencies, np.abs(fft_values)) # removed use_line_collection
plt.title("FFT of Student Marks")
plt.xlabel("Frequency (cycles per sample)")
plt.ylabel("Amplitude")
plt.show()
N = len(marks)
plt.figure(figsize=(8,5))
plt.stem(frequencies[:N//2], np.abs(fft_values)[:N//2])
plt.title("FFT of Student Marks (Positive Frequencies)")
plt.xlabel("Frequency")
plt.ylabel("Amplitude")
plt.show()
import pandas as pd
df = pd.DataFrame({
'Name': ['Pema', 'Sonam', 'Karma', 'Thinley'],
'Marks': [55, 72, 88, 45],
'Gender': ['F', 'M', 'M', 'M']
})
df
| Name | Marks | Gender | |
|---|---|---|---|
| 0 | Pema | 55 | F |
| 1 | Sonam | 72 | M |
| 2 | Karma | 88 | M |
| 3 | Thinley | 45 | M |
Transform: Conver Data Type¶
Marks from integer to float
df['Marks'] = df['Marks'].astype(float)
df
| Name | Marks | Gender | Marks_scaled | |
|---|---|---|---|---|
| 0 | Pema | 55.0 | F | -0.609145 |
| 1 | Sonam | 72.0 | M | 0.426401 |
| 2 | Karma | 88.0 | M | 1.401033 |
| 3 | Thinley | 45.0 | M | -1.218290 |
Scaling¶
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df['Marks_scaled'] = scaler.fit_transform(df[['Marks']])
df
| Name | Marks | Gender | Marks_scaled | |
|---|---|---|---|---|
| 0 | Pema | 55.0 | F | -0.609145 |
| 1 | Sonam | 72.0 | M | 0.426401 |
| 2 | Karma | 88.0 | M | 1.401033 |
| 3 | Thinley | 45.0 | M | -1.218290 |
# =========================================================
# MNIST IMAGE FFT VISUALIZATION - ALL IN ONE
# =========================================================
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
# -------------------------------
# 1. Load MNIST dataset
# -------------------------------
(X_train, y_train), (_, _) = mnist.load_data()
img = X_train[0] # First image
label = y_train[0]
# -------------------------------
# 2. Compute 2D FFT
# -------------------------------
fft_img = np.fft.fft2(img)
fft_shifted = np.fft.fftshift(fft_img) # center zero frequency
magnitude_spectrum = np.log(np.abs(fft_shifted) + 1)
# -------------------------------
# 3. Low-Pass and High-Pass Filters
# -------------------------------
rows, cols = img.shape
crow, ccol = rows//2, cols//2
# Low-pass filter mask (center 20x20)
mask_low = np.zeros((rows, cols))
mask_low[crow-10:crow+10, ccol-10:ccol+10] = 1
# High-pass filter mask (remove center)
mask_high = 1 - mask_low
# Apply masks
fft_low = fft_shifted * mask_low
fft_high = fft_shifted * mask_high
# Reconstruct images
img_low = np.fft.ifft2(np.fft.ifftshift(fft_low)).real
img_high = np.fft.ifft2(np.fft.ifftshift(fft_high)).real
# -------------------------------
# 4. Plot all images side-by-side
# -------------------------------
plt.figure(figsize=(16,4))
# Original image
plt.subplot(1,4,1)
plt.imshow(img, cmap='gray')
plt.title(f"Original Image - Label {label}")
plt.axis('off')
# FFT Magnitude Spectrum
plt.subplot(1,4,2)
plt.imshow(magnitude_spectrum, cmap='gray')
plt.title("FFT Magnitude Spectrum")
plt.axis('off')
# Low-pass filtered
plt.subplot(1,4,3)
pl
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[7], line 7 5 import numpy as np 6 import matplotlib.pyplot as plt ----> 7 from tensorflow.keras.datasets import mnist 9 # ------------------------------- 10 # 1. Load MNIST dataset 11 # ------------------------------- 12 (X_train, y_train), (_, _) = mnist.load_data() ModuleNotFoundError: No module named 'tensorflow'
Animated Spectrogram Example¶
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
# Create a sample signal: 5 seconds, 2 Hz and 5 Hz components
fs = 1000 # Sampling frequency in Hz
t = np.linspace(0, 5, 5*fs)
signal = np.sin(2*np.pi*2*t) + 0.5*np.sin(2*np.pi*5*t)
# Compute spectrogram
frequencies, times, Sxx = spectrogram(signal, fs)
# Plot spectrogram
plt.figure(figsize=(10, 6))
plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.title('Spectrogram of Synthetic Signal')
plt.colorbar(label='Power/Frequency (dB/Hz)')
plt.show()
DTMF¶
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
from scipy.signal import spectrogram
# Sampling parameters
fs = 8000 # Sampling frequency
duration = 0.5 # Duration of each tone (seconds)
# DTMF frequency mapping
dtmf_freqs = {
'1': (697, 1209),
'2': (697, 1336),
'3': (697, 1477),
'4': (770, 1209),
'5': (770, 1336),
'6': (770, 1477),
'7': (852, 1209),
'8': (852, 1336),
'9': (852, 1477),
'*': (941, 1209),
'0': (941, 1336),
'#': (941, 1477)
}
# Function to generate a DTMF tone for a key
def generate_dtmf(key):
t = np.linspace(0, duration, int(fs*duration), endpoint=False)
f1, f2 = dtmf_freqs[key]
tone = np.sin(2*np.pi*f1*t) + np.sin(2*np.pi*f2*t)
return tone
# Example: generate a sequence '123#'
sequence = '123#'
signal = np.concatenate([generate_dtmf(k) for k in sequence])
# Normalize
signal = signal / np.max(np.abs(signal))
# Save as WAV file
write('dtmf_sequence.wav', fs, (signal * 32767).astype(np.int16))
# Plot spectrogram
frequencies, times, Sxx = spectrogram(signal, fs)
plt.figure(figsize=(10,6))
plt.pcolormesh(times, frequencies, 10*np.log10(Sxx), shading='gouraud')
plt.title('Spectrogram of DTMF Sequence')
plt.xlabel('Time [s]')
plt.ylabel('Frequency [Hz]')
plt.colorbar(label='Power/Frequency (dB/Hz)')
plt.ylim(500, 1600)
plt.show()
Explanation
Signal creation:
First 2.5 seconds → 2 Hz sine wave.
After 2.5 seconds → 5 Hz sine wave.
This simulates a frequency appearing over time.
Spectrogram computation:
spectrogram(signal[:frame], fs) computes the spectrogram for the signal up to the current frame.
Animation:
FuncAnimation updates the plot frame by frame, showing how the spectrogram changes as the new frequency appears.
Result:
You’ll see the 2 Hz line first, then the 5 Hz line appears halfway, which visually demonstrates the power of spectrograms.
Cell In[12], line 5 First 2.5 seconds → 2 Hz sine wave. ^ SyntaxError: invalid character '→' (U+2192)
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
# DTMF frequency mapping
dtmf_freqs = {
(697, 1209): '1', (697, 1336): '2', (697, 1477): '3',
(770, 1209): '4', (770, 1336): '5', (770, 1477): '6',
(852, 1209): '7', (852, 1336): '8', (852, 1477): '9',
(941, 1209): '*', (941, 1336): '0', (941, 1477): '#'
}
low_freqs = np.array([697, 770, 852, 941])
high_freqs = np.array([1209, 1336, 1477])
# Read the audio file
fs, audio = wavfile.read("dtmf_sequence.wav")
# If stereo, convert to mono
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
# Split signal into chunks (assuming ~0.5s per key)
chunk_size = int(0.5 * fs)
num_chunks = len(audio) // chunk_size
detected_keys = []
for i in range(num_chunks):
chunk = audio[i*chunk_size : (i+1)*chunk_size]
# Compute FFT
fft_vals = np.fft.fft(chunk)
fft_freqs = np.fft.fftfreq(len(chunk), 1/fs)
# Take only positive frequencies
fft_vals = np.abs(fft_vals[:len(chunk)//2])
fft_freqs = fft_freqs[:len(chunk)//2]
# Find peaks corresponding to DTMF frequencies
low_peak = low_freqs[np.argmin([abs(f - fft_freqs[np.argmax(fft_vals[(fft_freqs>=f-10)&(fft_freqs<=f+10)])]) for f in low_freqs])]
high_peak = high_freqs[np.argmin([abs(f - fft_freqs[np.argmax(fft_vals[(fft_freqs>=f-10)&(fft_freqs<=f+10)])]) for f in high_freqs])]
detected_keys.append(dtmf_freqs[(low_peak, high_peak)])
print("Detected DTMF sequence:", "".join(detected_keys))
# Optional: plot spectrogram for visualization
from scipy.signal import spectrogram
frequencies, times, Sxx = spectrogram(audio, fs)
plt.figure(figsize=(10,6))
plt.pcolormesh(times, frequencies, 10*np.log10(Sxx), shading='gouraud')
plt.title('Spectrogram of DTMF Audio')
plt.xlabel('Time [s]')
plt.ylabel('Frequency [Hz]')
plt.colorbar(label='Power/Frequency (dB/Hz)')
plt.ylim(500, 1600)
plt.show()
Detected DTMF sequence: 1111