In Python, handling CSV files is straightforward using the built-in
#python #csv #pandas #datahandling #fileio #interviewtips
π @DataScience4
csv module for reading and writing tabular data, or pandas for advanced analysisβessential for data processing tasks like importing/exporting datasets in interviews.# Reading CSV with csv module (basic)
import csv
with open('data.csv', 'r') as file:
reader = csv.reader(file)
data = list(reader) # data = [['Name', 'Age'], ['Alice', '30'], ['Bob', '25']]
# Writing CSV with csv module
import csv
with open('output.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Name', 'Age']) # Header
writer.writerows([['Alice', 30], ['Bob', 25]]) # Data rows
# Advanced: Reading with pandas (handles headers, missing values)
import pandas as pd
df = pd.read_csv('data.csv') # df = DataFrame with columns 'Name', 'Age'
print(df.head()) # Output: First 5 rows preview
# Writing with pandas
df.to_csv('output.csv', index=False) # Saves without row indices
#python #csv #pandas #datahandling #fileio #interviewtips
π @DataScience4
Sepp Hochreiter, who invented LSTM 30+ year ago, gave a keynote talk at Neurips 2024 and introduced xLSTM (Extended Long Short-Term Memory).
I designed this Excel exercise to help you understand how xLSTM works.
More: https://www.byhand.ai/p/xlstm
I designed this Excel exercise to help you understand how xLSTM works.
More: https://www.byhand.ai/p/xlstm
In Python, image processing unlocks powerful capabilities for computer vision, data augmentation, and automationβmaster these techniques to excel in ML engineering interviews and real-world applications! πΌ
# PIL/Pillow Basics - The essential image library
from PIL import Image
# Open and display image
img = Image.open("input.jpg")
img.show()
# Convert formats
img.save("output.png")
img.convert("L").save("grayscale.jpg") # RGB to grayscale
# Basic transformations
img.rotate(90).save("rotated.jpg")
img.resize((300, 300)).save("resized.jpg")
img.transpose(Image.FLIP_LEFT_RIGHT).save("mirrored.jpg")
# Advanced Manipulation - Professional editing
from PIL import ImageEnhance, ImageFilter
# Adjust brightness/contrast
enhancer = ImageEnhance.Brightness(img)
bright_img = enhancer.enhance(1.5) # 50% brighter
# Apply filters
blurred = img.filter(ImageFilter.BLUR)
sharpened = img.filter(ImageFilter.SHARPEN)
edges = img.filter(ImageFilter.FIND_EDGES)
# Color manipulation
color_enhancer = ImageEnhance.Color(img)
color_enhancer.enhance(2.0).save("vibrant.jpg") # Double saturation
# OpenCV Integration - Computer vision powerhouse
import cv2
import numpy as np
# Read and convert color spaces
cv_img = cv2.imread("input.jpg")
rgb_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
hsv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2HSV)
# Edge detection (Canny algorithm)
edges = cv2.Canny(cv_img, 100, 200)
cv2.imwrite("edges.jpg", edges)
# Face detection (interview favorite)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
faces = face_cascade.detectMultiScale(rgb_img, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(cv_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.imwrite("faces.jpg", cv_img)
# Batch Processing - Production automation
import os
from PIL import Image
def process_images(input_dir, output_dir):
os.makedirs(output_dir, exist_ok=True)
for filename in os.listdir(input_dir):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
with Image.open(os.path.join(input_dir, filename)) as img:
# Resize while maintaining aspect ratio
img.thumbnail((800, 800))
# Apply watermark
watermark = Image.open("watermark.png")
img.paste(watermark, (img.width - watermark.width, img.height - watermark.height), watermark)
img.save(os.path.join(output_dir, filename))
process_images("raw_photos", "processed")
# Image Augmentation - Deep learning preparation
from torchvision import transforms
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.RandomRotation(15),
transforms.Resize((224, 224)),
transforms.ToTensor()
])
# Apply to dataset
augmented_img = transform(img)
# EXIF Data Handling - Privacy/security critical
from PIL import Image
img = Image.open("photo_with_gps.jpg")
# Strip metadata (security interview question)
data = list(img.getdata())
clean_img = Image.new(img.mode, img.size)
clean_img.putdata(data)
clean_img.save("clean.jpg", "JPEG", exif=b"")
# Read specific metadata
exif = img.getexif()
if 36867 in exif: # DateTimeOriginal
print(exif[36867])
# Image Segmentation - Advanced computer vision
import numpy as np
import cv2
img = cv2.imread('input.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
# Morphological operations
kernel = np.ones((2,2), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=1)
# Find contours
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
area = cv2.contourArea(cnt)
if area > 100: # Filter small contours
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.imwrite("segmented.jpg", img)