PyData Careers

#python #programming #question #advanced #datastructures #datahandling

Write a comprehensive Python program that demonstrates advanced data handling techniques with various data structures:

1. Create and manipulate nested dictionaries representing a database of employees with complex data types.
2. Use JSON to serialize and deserialize a complex data structure containing lists, dictionaries, and custom objects.
3. Implement a class to represent a student with attributes and methods for data manipulation.
4. Use collections.Counter to analyze frequency of items in a dataset.
5. Demonstrate the use of defaultdict for grouping data by categories.
6. Implement a generator function to process large datasets efficiently.
7. Use itertools to create complex combinations and permutations of data.
8. Handle missing data using pandas DataFrames with different strategies (filling, dropping).
9. Convert between different data formats (dictionary, list, DataFrame, JSON).
10. Perform data validation using type hints and Pydantic models.

import json
from collections import Counter, defaultdict
from itertools import combinations, permutations
import pandas as pd
from typing import Dict, List, Any, Optional
from pydantic import BaseModel, Field
import numpy as np

# 1. Create nested dictionary representing employee database
employee_db = {
    'employees': [
        {
            'id': 1,
            'name': 'Alice Johnson',
            'department': 'Engineering',
            'salary': 85000,
            'projects': ['Project A', 'Project B'],
            'skills': {'Python': 8, 'JavaScript': 6, 'SQL': 7},
            'hobbies': ['reading', 'hiking']
        },
        {
            'id': 2,
            'name': 'Bob Smith',
            'department': 'Marketing',
            'salary': 75000,
            'projects': ['Project C'],
            'skills': {'Photoshop': 9, 'SEO': 8, 'Copywriting': 7},
            'hobbies': ['gaming', 'cooking']
        },
        {
            'id': 3,
            'name': 'Charlie Brown',
            'department': 'Engineering',
            'salary': 92000,
            'projects': ['Project A', 'Project D'],
            'skills': {'Python': 9, 'C++': 7, 'Linux': 8},
            'hobbies': ['coding', 'swimming']
        }
    ]
}

# 2. JSON serialization and deserialization
print("JSON Serialization:")
json_data = json.dumps(employee_db, indent=2)
print(json_data)

print("\nJSON Deserialization:")
loaded_data = json.loads(json_data)
print(f"Loaded data type: {type(loaded_data)}")

# 3. Student class with methods
class Student(BaseModel):
    name: str
    age: int
    grades: List[float]
    major: str = Field(..., alias='major')
    
    def average_grade(self) -> float:
        return sum(self.grades) / len(self.grades)
    
    def is_honors_student(self) -> bool:
        return self.average_grade() >= 3.5
    
    def get_skill_level(self, skill: str) -> Optional[int]:
        if hasattr(self, 'skills') and skill in self.skills:
            return self.skills[skill]
        return None

# 4. Using Counter to analyze data
print("\nUsing Counter to analyze skills:")
all_skills = []
for emp in employee_db['employees']:
    all_skills.extend(emp['skills'].keys())
skill_counter = Counter(all_skills)
print("Skill frequencies:", skill_counter)

# 5. Using defaultdict for grouping data
print("\nUsing defaultdict to group employees by department:")
dept_groups = defaultdict(list)
for emp in employee_db['employees']:
    dept_groups[emp['department']].append(emp['name'])

for dept, names in dept_groups.items():
    print(f"{dept}: {names}")

# 6. Generator function for processing large datasets
def large_dataset_generator(size: int):
    """Generator that yields numbers from 1 to size"""
    for i in range(1, size + 1):
        yield i * 2  # Double each number

print("\nUsing generator to process large dataset:")
gen = large_dataset_generator(1000)
print("First 10 values from generator:", [next(gen) for _ in range(10)])

# 7. Using itertools for combinations and permutations

136 viewsedited 08:15