Python Data Science Jobs & Interviews

Question 3 (Expert):
In Python's asyncio, what is the purpose of the gather() function?

A) To collect garbage from memory
B) To run multiple awaitables concurrently and wait for all to finish
C) To gather system resources for a task
D) To aggregate results from multiple threads

#Python #AsyncIO #Concurrency #AdvancedProgramming

🫡1

1.07K views08:30

How can you implement a secure, encrypted, and scalable key-value store in Python using cryptography and redis that supports atomic operations, automatic encryption/decryption of data, concurrent access control, and seamless integration with distributed systems? Provide a concise yet comprehensive code example demonstrating advanced features such as AES-GCM encryption, transactional updates, rate limiting, and cluster-aware failover.

import redis
import asyncio
import json
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import os
import time
from typing import Dict, Any, Optional

# Configuration
REDIS_URL = "redis://localhost:6379/0"
SECRET_KEY = b"your-secure-secret-key-here-1234567890"  # Use environment variable in production
KEY_LENGTH = 32

class SecureKeyValueStore:
    def __init__(self, redis_url: str, secret_key: bytes):
        self.redis_client = redis.from_url(redis_url)
        self.fernet = Fernet(secret_key)
        self._lock = asyncio.Lock()
        self.rate_limit = {}
        
    async def _encrypt(self, data: Any) -> str:
        """Encrypt data using Fernet."""
        json_data = json.dumps(data).encode('utf-8')
        return self.fernet.encrypt(json_data).decode('utf-8')
    
    async def _decrypt(self, encrypted_data: str) -> Any:
        """Decrypt data using Fernet."""
        try:
            decrypted = self.fernet.decrypt(encrypted_data.encode('utf-8'))
            return json.loads(decrypted.decode('utf-8'))
        except Exception as e:
            logger.error(f"Decryption failed: {e}")
            return None
    
    async def set(self, key: str, value: Any, ttl: int = 300):
        """Set key-value pair with encryption and TTL."""
        encrypted_value = await self._encrypt(value)
        async with self._lock:
            await self.redis_client.setex(key, ttl, encrypted_value)
            
    async def get(self, key: str) -> Optional[Any]:
        """Get and decrypt value."""
        raw_value = await self.redis_client.get(key)
        if raw_value:
            return await self._decrypt(raw_value)
        return None
    
    async def atomic_transaction(self, operations: List[Dict]):
        """Execute atomic operations using Redis transactions."""
        pipe = self.redis_client.pipeline()
        for op in operations:
            if op['type'] == 'set':
                encrypted = await self._encrypt(op['value'])
                pipe.setex(op['key'], op.get('ttl', 300), encrypted)
            elif op['type'] == 'delete':
                pipe.delete(op['key'])
        await pipe.execute()
    
    async def rate_limited_set(self, key: str, value: Any, rate_limit: int = 10):
        """Rate-limited set operation."""
        now = time.time()
        if key not in self.rate_limit:
            self.rate_limit[key] = []
        self.rate_limit[key] = [t for t in self.rate_limit[key] if t > now - 60]
        
        if len(self.rate_limit[key]) >= rate_limit:
            raise Exception("Rate limit exceeded")
        
        self.rate_limit[key].append(now)
        await self.set(key, value)

# Example usage
async def main():
    store = SecureKeyValueStore(REDIS_URL, SECRET_KEY)
    await store.set("user:1", {"name": "John", "age": 30})
    data = await store.get("user:1")
    print(data)
    await store.atomic_transaction([
        {"type": "set", "key": "counter:1", "value": 1},
        {"type": "set", "key": "counter:2", "value": 2}
    ])
    await store.rate_limited_set("api_call", {"count": 1}, rate_limit=5)

# Run the example
asyncio.run(main())

#Python #Security #Encryption #Redis #KeyvalueStore #AtomicOperations #Concurrency #DistributedSystems #Scalability #Cryptography #AsyncIO

By: @DataScienceQ

🚀

Please open Telegram to view this post

VIEW IN TELEGRAM

❤1

216 viewsedited 17:12

Python Data Science Jobs & Interviews

How can you build a high-performance, fault-tolerant, and scalable web scraping framework in Python using aiohttp, selenium, asyncio, and redis to handle dynamic content, bypass anti-bot measures, and distribute crawling tasks across multiple workers? Provide a concise code example demonstrating advanced features such as rotating proxies, request rate limiting, error recovery, and distributed task queue management.

import asyncio
import aiohttp
import redis
import json
import random
from typing import Dict, Any, List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Configuration
REDIS_URL = "redis://localhost:6379/0"
PROXIES = ["https://proxy1:8080", "https://proxy2:8080"]
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}

class AsyncWebScraper:
    def __init__(self, redis_url: str):
        self.redis_client = redis.from_url(redis_url)
        self.session = None
        self.proxy = None
        
    async def setup_session(self):
        """Setup aiohttp session with proxy."""
        self.session = aiohttp.ClientSession()
        
    async def get_with_proxy(self, url: str) -> str:
        """Fetch URL with random proxy."""
        self.proxy = random.choice(PROXIES)
        headers = HEADERS.copy()
        headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        
        try:
            async with self.session.get(url, headers=headers, proxy=self.proxy) as response:
                return await response.text()
        except Exception as e:
            print(f"Request failed: {e}")
            return None
            
    async def scrape_with_selenium(self, url: str) -> str:
        """Scrape dynamic content using Selenium."""
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        
        driver = webdriver.Chrome(options=options)
        try:
            driver.get(url)
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )
            return driver.page_source
        finally:
            driver.quit()
            
    async def process_task(self, task_id: str, url: str):
        """Process individual scraping task."""
        # Rate limiting
        await asyncio.sleep(random.uniform(1, 3))
        
        # Try HTTP first, fallback to Selenium
        html = await self.get_with_proxy(url)
        if not html:
            html = await self.scrape_with_selenium(url)
            
        # Store result
        if html:
            await self.redis_client.set(f"result:{task_id}", html)
            
    async def worker_loop(self):
        """Worker that processes tasks from Redis queue."""
        while True:
            task = await self.redis_client.brpop("scraping_queue", timeout=5)
            if task:
                task_id, url = task[1].decode().split(":")
                await self.process_task(task_id, url)

# Example usage
async def main():
    scraper = AsyncWebScraper(REDIS_URL)
    await scraper.setup_session()
    
    # Add tasks to queue
    for i in range(5):
        await scraper.redis_client.lpush("scraping_queue", f"{i}:https://example.com")
    
    # Start worker
    await scraper.worker_loop()

asyncio.run(main())

#Python #WebScraping #AsyncIO #Selenium #Redis #ProxyRotation #FaultTolerance #DistributedSystems #DynamicContent #RateLimiting #Scalability

By: @DataScienceQ

🚀

Please open Telegram to view this post

VIEW IN TELEGRAM

❤2

225 viewsedited 17:15

Python Data Science Jobs & Interviews

Question:
How can you use Python’s asyncio and concurrent.futures to efficiently handle both I/O-bound and CPU-bound tasks in a single application, and what are the best practices for structuring such a system?

Answer:
To efficiently handle both I/O-bound (e.g., network requests, file I/O) and CPU-bound (e.g., data processing, math operations) tasks in Python, you should combine asyncio for I/O-bound work and concurrent.futures.ThreadPoolExecutor or ProcessPoolExecutor for CPU-bound tasks. This avoids blocking the event loop and maximizes performance.

Here’s an example:

import asyncio
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import aiohttp
import requests

# Simulated I/O-bound task (e.g., API call)
async def fetch_url(session, url):
    try:
        async with session.get(url) as response:
            return await response.text()
    except Exception as e:
        return f"Error: {e}"

# Simulated CPU-bound task (e.g., heavy computation)
def cpu_intensive_task(n):
    return sum(i * i for i in range(n))

# Main function using asyncio + thread/process pools
async def main():
    # I/O-bound tasks with asyncio
    urls = [
        "https://httpbin.org/json",
        "https://httpbin.org/headers",
        "https://httpbin.org/status/200"
    ]

    # Use aiohttp for concurrent HTTP requests
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_url(session, url) for url in urls]
        results = await asyncio.gather(*tasks)

    print("I/O-bound results:", results)

    # CPU-bound tasks with ProcessPoolExecutor
    with ProcessPoolExecutor() as executor:
        # Run CPU-intensive work in separate processes
        futures = [executor.submit(cpu_intensive_task, 1000000) for _ in range(3)]
        cpu_results = [future.result() for future in futures]

    print("CPU-bound results:", cpu_results)

# Run the async main function
if __name__ == "__main__":
    asyncio.run(main())

Explanation:
- asyncio handles I/O-bound tasks asynchronously without blocking the main thread.
- aiohttp is used for efficient HTTP requests.
- ProcessPoolExecutor runs CPU-heavy functions in separate processes (bypassing GIL).
- Mixing both ensures optimal resource usage: async for I/O, multiprocessing for CPU.

Best practices:
- Use ThreadPoolExecutor for light I/O or blocking code.
- Use ProcessPoolExecutor for CPU-intensive work.
- Avoid mixing async and blocking code directly — always offload CPU tasks.
- Use asyncio.gather() to run multiple coroutines concurrently.

#Python #AsyncIO #Concurrency #Multithreading #Multiprocessing #AdvancedPython #Programming #WebDevelopment #Performance

By: @DataScienceQ 🚀

❤1

566 viewsedited 04:19

Python Data Science Jobs & Interviews

💡 Python Asyncio Tip: Basic async/await for concurrent operations.

import asyncio

async def say_hello():
    await asyncio.sleep(0.1) # Simulate a non-blocking I/O call
    print("Hello from async!")

async def main():
    await say_hello()

asyncio.run(main())

#Python #Asyncio #Concurrency #Programming

---
By: @DataScienceQ ✨

❤1

194 views22:23

About

Blog

Apps

Platform