Python | Machine Learning | Coding

Forwarded from Python Data Science Jobs & Interviews

In Python, NumPy is the cornerstone of scientific computing, offering high-performance multidimensional arrays and tools for working with them—critical for data science interviews and real-world applications! 📊

import numpy as np

# Array Creation - The foundation of NumPy
arr = np.array([1, 2, 3])
zeros = np.zeros((2, 3))        # 2x3 matrix of zeros
ones = np.ones((2, 2), dtype=int)  # Integer matrix
arange = np.arange(0, 10, 2)    # [0 2 4 6 8]
linspace = np.linspace(0, 1, 5) # [0.  0.25 0.5  0.75 1.  ]
print(linspace)

# Array Attributes - Master your data's structure
matrix = np.array([[1, 2, 3], [4, 5, 6]])
print(matrix.shape)  # Output: (2, 3)
print(matrix.ndim)   # Output: 2
print(matrix.dtype)  # Output: int64
print(matrix.size)   # Output: 6

# Indexing & Slicing - Precision data access
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(data[1, 2])      # Output: 6 (row 1, col 2)
print(data[0:2, 1:3])  # Output: [[2 3], [5 6]]
print(data[:, -1])     # Output: [3 6 9] (last column)

# Reshaping Arrays - Transform dimensions effortlessly
flat = np.arange(6)
reshaped = flat.reshape(2, 3)
raveled = reshaped.ravel()
print(reshaped)
# Output: [[0 1 2], [3 4 5]]
print(raveled)  # Output: [0 1 2 3 4 5]

# Stacking Arrays - Combine datasets vertically/horizontally
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(np.vstack((a, b)))  # Vertical stack
# Output: [[1 2 3], [4 5 6]]
print(np.hstack((a, b)))  # Horizontal stack
# Output: [1 2 3 4 5 6]

# Mathematical Operations - Vectorized calculations
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
print(x + y)       # Output: [5 7 9]
print(x * 2)       # Output: [2 4 6]
print(np.dot(x, y)) # Output: 32 (1*4 + 2*5 + 3*6)

# Broadcasting Magic - Operate on mismatched shapes
matrix = np.array([[1, 2, 3], [4, 5, 6]])
scalar = 10
print(matrix + scalar)
# Output: [[11 12 13], [14 15 16]]

# Aggregation Functions - Statistical power in one line
values = np.array([1, 5, 3, 9, 7])
print(np.sum(values))   # Output: 25
print(np.mean(values))  # Output: 5.0
print(np.max(values))   # Output: 9
print(np.std(values))   # Output: 2.8284271247461903

# Boolean Masking - Filter data like a pro
temperatures = np.array([18, 25, 12, 30, 22])
hot_days = temperatures > 24
print(temperatures[hot_days])  # Output: [25 30]

# Random Number Generation - Simulate real-world data
print(np.random.rand(2, 2))      # Uniform distribution
print(np.random.randn(3))        # Normal distribution
print(np.random.randint(0, 10, (2, 3)))  # Random integers

# Linear Algebra Essentials - Solve equations like a physicist
A = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
x = np.linalg.solve(A, b)
print(x)  # Output: [2. 3.] (Solution to 3x+y=9 and x+2y=8)

# Matrix inverse and determinant
print(np.linalg.inv(A))   # Output: [[ 0.4 -0.2], [-0.2  0.6]]
print(np.linalg.det(A))   # Output: 5.0

# File Operations - Save/load your computational work
data = np.array([[1, 2], [3, 4]])
np.save('array.npy', data)
loaded = np.load('array.npy')
print(np.array_equal(data, loaded))  # Output: True

# Interview Power Move: Vectorization vs Loops
# 10x faster than native Python loops!
def square_sum(n):
    arr = np.arange(n)
    return np.sum(arr ** 2)

print(square_sum(5))  # Output: 30 (0²+1²+2²+3²+4²)

# Pro Tip: Memory-efficient data processing
# Process 1GB array without loading entire dataset
large_array = np.memmap('large_data.bin', dtype='float32', mode='r', shape=(1000000, 100))
print(large_array[0:5, 0:3])  # Process small slice

By: @DataScienceQ 🚀

#Python #NumPy #DataScience #CodingInterview #MachineLearning #ScientificComputing #DataAnalysis #Programming #TechJobs #DeveloperTips

❤6

1.57K views12:29

Python | Machine Learning | Coding | R

Top 100 Data Analysis Commands & Functions

#DataAnalysis #Pandas #DataLoading #Inspection

Part 1: Pandas - Data Loading & Inspection

#1. pd.read_csv()
Reads a comma-separated values (csv) file into a Pandas DataFrame.

import pandas as pd
from io import StringIO

csv_data = "col1,col2,col3\n1,a,True\n2,b,False"
df = pd.read_csv(StringIO(csv_data))
print(df)

col1 col2   col3
0     1    a   True
1     2    b  False

#2. df.head()
Returns the first n rows of the DataFrame (default is 5).

import pandas as pd
df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')})
print(df.head(3))

#3. df.tail()
Returns the last n rows of theDataFrame (default is 5).

import pandas as pd
df = pd.DataFrame({'A': range(10), 'B': list('abcdefghij')})
print(df.tail(3))

#4. df.info()
Prints a concise summary of a DataFrame, including data types and non-null values.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, 2, np.nan], 'B': ['x', 'y', 'z']})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       2 non-null      float64
 1   B       3 non-null      object 
dtypes: float64(1), object(1)
memory usage: 176.0+ bytes

#5. df.describe()
Generates descriptive statistics for numerical columns.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3, 4, 5]})
print(df.describe())

A
count  5.000000
mean   3.000000
std    1.581139
min    1.000000
25%    2.000000
50%    3.000000
75%    4.000000
max    5.000000

#6. df.shape
Returns a tuple representing the dimensionality (rows, columns) of the DataFrame.

import pandas as pd
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
print(df.shape)

(2, 3)

#7. df.columns
Returns the column labels of the DataFrame.

import pandas as pd
df = pd.DataFrame({'Name': ['Alice'], 'Age': [30]})
print(df.columns)

Index(['Name', 'Age'], dtype='object')

#8. df.dtypes
Returns the data types of each column.

import pandas as pd
df = pd.DataFrame({'A': [1, 2], 'B': [1.1, 2.2], 'C': ['x', 'y']})
print(df.dtypes)

A      int64
B    float64
C     object
dtype: object

#9. df['col'].value_counts()
Returns a Series containing counts of unique values in a column.

import pandas as pd
df = pd.DataFrame({'Fruit': ['Apple', 'Banana', 'Apple', 'Orange', 'Banana', 'Apple']})
print(df['Fruit'].value_counts())

Apple     3
Banana    2
Orange    1
Name: Fruit, dtype: int64

#10. df['col'].unique()
Returns an array of the unique values in a column.

import pandas as pd
df = pd.DataFrame({'Fruit': ['Apple', 'Banana', 'Apple', 'Orange']})
print(df['Fruit'].unique())

['Apple' 'Banana' 'Orange']

#11. df['col'].nunique()
Returns the number of unique values in a column.

❤2

467 views17:02

Python | Machine Learning | Coding | R

import pandas as pd
df = pd.DataFrame({'Fruit': ['Apple', 'Banana', 'Apple', 'Orange']})
print(df['Fruit'].nunique())

#12. df.isnull()
Returns a DataFrame of boolean values indicating missing values.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan], 'B': [np.nan, 'x']})
print(df.isnull())

A      B
0  False   True
1   True  False

#13. df.isnull().sum()
Returns the number of missing values in each column.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3, np.nan], 'B': [5, 6, 7, 8]})
print(df.isnull().sum())

A    2
B    0
dtype: int64

#14. df.to_csv()
Writes the DataFrame to a comma-separated values (csv) file.

import pandas as pd
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
csv_output = df.to_csv(index=False)
print(csv_output)

A,B
1,3
2,4

#15. df.copy()
Creates a deep copy of a DataFrame.

import pandas as pd
df1 = pd.DataFrame({'A': [1]})
df2 = df1.copy()
df2.loc[0, 'A'] = 99
print(f"Original df1:\n{df1}")
print(f"Copied df2:\n{df2}")

Original df1:
   A
0  1
Copied df2:
    A
0  99

---
#DataAnalysis #Pandas #Selection #Indexing

Part 2: Pandas - Data Selection & Indexing

#16. df['col']
Selects a single column as a Series.

import pandas as pd
df = pd.DataFrame({'Name': ['Alice', 'Bob'], 'Age': [30, 25]})
print(df['Name'])

0    Alice
1      Bob
Name: Name, dtype: object

#17. df[['col1', 'col2']]
Selects multiple columns as a new DataFrame.

import pandas as pd
df = pd.DataFrame({'Name': ['Alice'], 'Age': [30], 'City': ['New York']})
print(df[['Name', 'City']])

Name       City
0  Alice   New York

#18. df.loc[]
Accesses a group of rows and columns by label(s) or a boolean array.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]}, index=['x', 'y', 'z'])
print(df.loc['y'])

A    2
Name: y, dtype: int64

#19. df.iloc[]
Accesses a group of rows and columns by integer position(s).

import pandas as pd
df = pd.DataFrame({'A': [10, 20, 30]})
print(df.iloc[1])

A    20
Name: 1, dtype: int64

#20. df[df['col'] > value]
Selects rows based on a boolean condition (boolean indexing).

import pandas as pd
df = pd.DataFrame({'Age': [22, 35, 18, 40]})
print(df[df['Age'] > 30])

Age
1   35
3   40

#21. df.set_index()
Sets the DataFrame index using existing columns.

import pandas as pd
df = pd.DataFrame({'Country': ['USA', 'UK'], 'Code': [1, 44]})
df_indexed = df.set_index('Country')
print(df_indexed)

Code
Country      
USA         1
UK         44

#22. df.reset_index()
Resets the index of the DataFrame and uses the default integer index.

import pandas as pd
df = pd.DataFrame({'Code': [1, 44]}, index=['USA', 'UK'])
df_reset = df.reset_index()
print(df_reset)

index  Code
0   USA     1
1    UK    44

#23. df.at[]
Accesses a single value by row/column label pair. Faster than .loc.

❤1

255 views17:02

Python | Machine Learning | Coding | R

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]}, index=['x', 'y', 'z'])
print(df.at['y', 'A'])

#24. df.iat[]
Accesses a single value by row/column integer position. Faster than .iloc.

import pandas as pd
df = pd.DataFrame({'A': [10, 20, 30]})
print(df.iat[1, 0])

#25. df.sample()
Returns a random sample of items from an axis of object.

import pandas as pd
df = pd.DataFrame({'A': range(10)})
print(df.sample(n=3))

A
8  8
2  2
5  5
(Note: Output rows will be random)

---
#DataAnalysis #Pandas #DataCleaning #Manipulation

Part 3: Pandas - Data Cleaning & Manipulation

#26. df.dropna()
Removes missing values.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3]})
print(df.dropna())

A
0  1.0
2  3.0

#27. df.fillna()
Fills missing (NA/NaN) values using a specified method.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3]})
print(df.fillna(0))

#28. df.astype()
Casts a pandas object to a specified dtype.

import pandas as pd
df = pd.DataFrame({'A': [1.1, 2.7, 3.5]})
df['A'] = df['A'].astype(int)
print(df)

#29. df.rename()
Alters axes labels.

import pandas as pd
df = pd.DataFrame({'a': [1], 'b': [2]})
df_renamed = df.rename(columns={'a': 'A', 'b': 'B'})
print(df_renamed)

A  B
0  1  2

#30. df.drop()
Drops specified labels from rows or columns.

import pandas as pd
df = pd.DataFrame({'A': [1], 'B': [2], 'C': [3]})
df_dropped = df.drop(columns=['B'])
print(df_dropped)

A  C
0  1  3

#31. pd.to_datetime()
Converts argument to datetime.

import pandas as pd
s = pd.Series(['2023-01-01', '2023-01-02'])
dt_s = pd.to_datetime(s)
print(dt_s)

0   2023-01-01
1   2023-01-02
dtype: datetime64[ns]

#32. df.apply()
Applies a function along an axis of the DataFrame.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]})
df['B'] = df['A'].apply(lambda x: x * 2)
print(df)

#33. df['col'].map()
Maps values of a Series according to an input mapping or function.

import pandas as pd
df = pd.DataFrame({'Gender': ['M', 'F', 'M']})
df['Gender_Full'] = df['Gender'].map({'M': 'Male', 'F': 'Female'})
print(df)

Gender Gender_Full
0      M        Male
1      F      Female
2      M        Male

#34. df.replace()
Replaces values given in to_replace with value.

import pandas as pd
df = pd.DataFrame({'Score': [10, -99, 15, -99]})
df_replaced = df.replace(-99, 0)
print(df_replaced)

#35. df.duplicated()
Returns a boolean Series denoting duplicate rows.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1], 'B': ['a', 'b', 'a']})
print(df.duplicated())

0    False
1    False
2     True
dtype: bool

#36. df.drop_duplicates()
Returns a DataFrame with duplicate rows removed.

246 views17:02

Python | Machine Learning | Coding | R

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1], 'B': ['a', 'b', 'a']})
print(df.drop_duplicates())

A  B
0  1  a
1  2  b

#37. df.sort_values()
Sorts by the values along either axis.

import pandas as pd
df = pd.DataFrame({'Age': [25, 22, 30]})
print(df.sort_values(by='Age'))

#38. df.sort_index()
Sorts object by labels (along an axis).

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]}, index=[10, 5, 8])
print(df.sort_index())

#39. pd.cut()
Bins values into discrete intervals.

import pandas as pd
ages = pd.Series([22, 35, 58, 8, 42])
age_bins = pd.cut(ages, bins=[0, 18, 35, 60], labels=['Child', 'Adult', 'Senior'])
print(age_bins)

0     Adult
1     Adult
2    Senior
3     Child
4    Senior
dtype: category
Categories (3, object): ['Child' < 'Adult' < 'Senior']

#40. pd.qcut()
Quantile-based discretization function (bins into equal-sized groups).

import pandas as pd
data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
quartiles = pd.qcut(data, 4, labels=False)
print(quartiles)

0    0
1    0
2    0
3    1
4    1
5    2
6    2
7    3
8    3
9    3
dtype: int64

#41. s.str.contains()
Tests if a pattern or regex is contained within a string of a Series.

import pandas as pd
s = pd.Series(['apple', 'banana', 'apricot'])
print(s[s.str.contains('ap')])

0      apple
2    apricot
dtype: object

#42. s.str.split()
Splits strings around a given separator/delimiter.

import pandas as pd
s = pd.Series(['a_b', 'c_d'])
print(s.str.split('_', expand=True))

0  1
0  a  b
1  c  d

#43. s.str.lower()
Converts strings in the Series to lowercase.

import pandas as pd
s = pd.Series(['HELLO', 'World'])
print(s.str.lower())

0    hello
1    world
dtype: object

#44. s.str.strip()
Removes leading and trailing whitespace.

import pandas as pd
s = pd.Series(['  hello  ', ' world '])
print(s.str.strip())

0    hello
1    world
dtype: object

#45. s.dt.year
Extracts the year from a datetime Series.

import pandas as pd
s = pd.to_datetime(pd.Series(['2023-01-01', '2024-05-10']))
print(s.dt.year)

0    2023
1    2024
dtype: int64

---
#DataAnalysis #Pandas #Grouping #Aggregation

Part 4: Pandas - Grouping & Aggregation

#46. df.groupby()
Groups a DataFrame using a mapper or by a Series of columns.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
grouped = df.groupby('Team')
print(grouped)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x...>

#47. groupby.agg()
Aggregates using one or more operations over the specified axis.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
agg_df = df.groupby('Team').agg(['mean', 'sum'])
print(agg_df)

Points     
        mean  sum
Team             
A         11   22
B          7   14

#48. groupby.size()
Computes group sizes.

❤1

222 views17:02

Python | Machine Learning | Coding | R

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B', 'A']})
print(df.groupby('Team').size())

Team
A    3
B    2
dtype: int64

#49. groupby.count()
Computes the count of non-NA cells for each group.

import pandas as pd
import numpy as np
df = pd.DataFrame({'Team': ['A', 'B', 'A'], 'Score': [1, np.nan, 3]})
print(df.groupby('Team').count())

Score
Team       
A         2
B         0

#50. groupby.mean()
Computes the mean of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').mean())

Points
Team        
A         11
B          7

#51. groupby.sum()
Computes the sum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').sum())

Points
Team        
A         22
B         14

#52. groupby.min()
Computes the minimum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').min())

Points
Team        
A         10
B          6

#53. groupby.max()
Computes the maximum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').max())

Points
Team        
A         12
B          8

#54. df.pivot_table()
Creates a spreadsheet-style pivot table as a DataFrame.

import pandas as pd
df = pd.DataFrame({'A': ['foo', 'foo', 'bar'], 'B': ['one', 'two', 'one'], 'C': [1, 2, 3]})
pivot = df.pivot_table(values='C', index='A', columns='B')
print(pivot)

B    one  two
A            
bar  3.0  NaN
foo  1.0  2.0

#55. pd.crosstab()
Computes a cross-tabulation of two (or more) factors.

import pandas as pd
df = pd.DataFrame({'A': ['foo', 'foo', 'bar'], 'B': ['one', 'two', 'one']})
crosstab = pd.crosstab(df.A, df.B)
print(crosstab)

B    one  two
A            
bar    1    0
foo    1    1

---
#DataAnalysis #Pandas #Merging #Joining

Part 5: Pandas - Merging & Concatenating

#56. pd.merge()
Merges DataFrame or named Series objects with a database-style join.

import pandas as pd
df1 = pd.DataFrame({'key': ['A', 'B'], 'val1': [1, 2]})
df2 = pd.DataFrame({'key': ['A', 'B'], 'val2': [3, 4]})
merged = pd.merge(df1, df2, on='key')
print(merged)

key  val1  val2
0   A     1     3
1   B     2     4

#57. pd.concat()
Concatenates pandas objects along a particular axis.

import pandas as pd
df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'A': [3, 4]})
concatenated = pd.concat([df1, df2])
print(concatenated)

#58. df.join()
Joins columns with other DataFrame(s) on index or on a key column.

❤1

248 views17:02

Python | Machine Learning | Coding | R

import pandas as pd
df1 = pd.DataFrame({'val1': [1, 2]}, index=['A', 'B'])
df2 = pd.DataFrame({'val2': [3, 4]}, index=['A', 'B'])
joined = df1.join(df2)
print(joined)

val1  val2
A     1     3
B     2     4

#59. pd.get_dummies()
Converts categorical variable into dummy/indicator variables (one-hot encoding).

import pandas as pd
s = pd.Series(list('abca'))
dummies = pd.get_dummies(s)
print(dummies)

#60. df.nlargest()
Returns the first n rows ordered by columns in descending order.

import pandas as pd
df = pd.DataFrame({'population': [100, 500, 200, 800]})
print(df.nlargest(2, 'population'))

population
3         800
1         500

---
#DataAnalysis #NumPy #Arrays

Part 6: NumPy - Array Creation & Manipulation

#61. np.array()
Creates a NumPy ndarray.

import numpy as np
arr = np.array([1, 2, 3])
print(arr)

[1 2 3]

#62. np.arange()
Returns an array with evenly spaced values within a given interval.

import numpy as np
arr = np.arange(0, 5)
print(arr)

[0 1 2 3 4]

#63. np.linspace()
Returns an array with evenly spaced numbers over a specified interval.

import numpy as np
arr = np.linspace(0, 10, 5)
print(arr)

[ 0.   2.5  5.   7.5 10. ]

#64. np.zeros()
Returns a new array of a given shape and type, filled with zeros.

import numpy as np
arr = np.zeros((2, 3))
print(arr)

[[0. 0. 0.]
 [0. 0. 0.]]

#65. np.ones()
Returns a new array of a given shape and type, filled with ones.

import numpy as np
arr = np.ones((2, 3))
print(arr)

[[1. 1. 1.]
 [1. 1. 1.]]

#66. np.random.rand()
Creates an array of the given shape and populates it with random samples from a uniform distribution over [0, 1).

import numpy as np
arr = np.random.rand(2, 2)
print(arr)

[[0.13949386 0.2921446 ]
 [0.52273283 0.77122228]]
(Note: Output values will be random)

#67. arr.reshape()
Gives a new shape to an array without changing its data.

import numpy as np
arr = np.arange(6)
reshaped_arr = arr.reshape((2, 3))
print(reshaped_arr)

[[0 1 2]
 [3 4 5]]

#68. np.concatenate()
Joins a sequence of arrays along an existing axis.

import numpy as np
a = np.array([[1, 2]])
b = np.array([[3, 4]])
print(np.concatenate((a, b), axis=0))

[[1 2]
 [3 4]]

#69. np.vstack()
Stacks arrays in sequence vertically (row wise).

import numpy as np
a = np.array([1, 2])
b = np.array([3, 4])
print(np.vstack((a, b)))

[[1 2]
 [3 4]]

#70. np.hstack()
Stacks arrays in sequence horizontally (column wise).

import numpy as np
a = np.array([1, 2])
b = np.array([3, 4])
print(np.hstack((a, b)))

[1 2 3 4]

---
#DataAnalysis #NumPy #Math #Statistics

Part 7: NumPy - Mathematical & Statistical Functions

#71. np.mean()
Computes the arithmetic mean along the specified axis.

import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(np.mean(arr))

3.0

#72. np.median()
Computes the median along the specified axis.

import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(np.median(arr))

3.0

#73. np.std()
Computes the standard deviation along the specified axis.

209 views17:02

Python | Machine Learning | Coding | R

import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(np.std(arr))

1.4142135623730951

#74. np.sum()
Sums array elements over a given axis.

import numpy as np
arr = np.array([[1, 2], [3, 4]])
print(np.sum(arr))

#75. np.min()
Returns the minimum of an array or minimum along an axis.

import numpy as np
arr = np.array([5, 2, 8, 1])
print(np.min(arr))

#76. np.max()
Returns the maximum of an array or maximum along an axis.

import numpy as np
arr = np.array([5, 2, 8, 1])
print(np.max(arr))

#77. np.sqrt()
Returns the non-negative square-root of an array, element-wise.

import numpy as np
arr = np.array([4, 9, 16])
print(np.sqrt(arr))

[2. 3. 4.]

#78. np.log()
Calculates the natural logarithm, element-wise.

import numpy as np
arr = np.array([1, np.e, np.e**2])
print(np.log(arr))

[0. 1. 2.]

#79. np.dot()
Calculates the dot product of two arrays.

import numpy as np
a = np.array([1, 2])
b = np.array([3, 4])
print(np.dot(a, b))

#80. np.where()
Returns elements chosen from x or y depending on a condition.

import numpy as np
arr = np.array([10, 5, 20, 15])
print(np.where(arr > 12, 'High', 'Low'))

['Low' 'Low' 'High' 'High']

---
#DataAnalysis #Matplotlib #Seaborn #Visualization

Part 8: Matplotlib & Seaborn - Data Visualization

#81. plt.plot()
Plots y versus x as lines and/or markers.

import matplotlib.pyplot as plt
plt.plot([1, 2, 3, 4], [1, 4, 9, 16])
# In a real script, you would call plt.show()
print("Output: A figure window opens displaying a line plot.")

Output: A figure window opens displaying a line plot.

#82. plt.scatter()
A scatter plot of y vs. x with varying marker size and/or color.

import matplotlib.pyplot as plt
plt.scatter([1, 2, 3, 4], [1, 4, 9, 16])
print("Output: A figure window opens displaying a scatter plot.")

Output: A figure window opens displaying a scatter plot.

#83. plt.hist()
Computes and draws the histogram of x.

import matplotlib.pyplot as plt
import numpy as np
data = np.random.randn(1000)
plt.hist(data, bins=30)
print("Output: A figure window opens displaying a histogram.")

Output: A figure window opens displaying a histogram.

#84. plt.bar()
Makes a bar plot.

import matplotlib.pyplot as plt
plt.bar(['A', 'B', 'C'], [10, 15, 7])
print("Output: A figure window opens displaying a bar chart.")

Output: A figure window opens displaying a bar chart.

#85. plt.boxplot()
Makes a box and whisker plot.

import matplotlib.pyplot as plt
import numpy as np
data = [np.random.normal(0, std, 100) for std in range(1, 4)]
plt.boxplot(data)
print("Output: A figure window opens displaying a box plot.")

Output: A figure window opens displaying a box plot.

#86. sns.heatmap()
Plots rectangular data as a color-encoded matrix.

import seaborn as sns
import numpy as np
data = np.random.rand(10, 12)
sns.heatmap(data)
print("Output: A figure window opens displaying a heatmap.")

Output: A figure window opens displaying a heatmap.

#87. sns.pairplot()
Plots pairwise relationships in a dataset.

❤1

282 views17:02

Python | Machine Learning | Coding | R

import seaborn as sns
import pandas as pd
df = pd.DataFrame(np.random.randn(100, 4), columns=['A', 'B', 'C', 'D'])
# sns.pairplot(df) # This line would generate the plot
print("Output: A figure grid opens showing scatterplots for each pair of variables.")

Output: A figure grid opens showing scatterplots for each pair of variables.

#88. sns.countplot()
Shows the counts of observations in each categorical bin using bars.

import seaborn as sns
import pandas as pd
df = pd.DataFrame({'category': ['A', 'B', 'A', 'C', 'A', 'B']})
sns.countplot(x='category', data=df)
print("Output: A figure window opens showing a count plot.")

Output: A figure window opens showing a count plot.

#89. sns.jointplot()
Draws a plot of two variables with bivariate and univariate graphs.

import seaborn as sns
import pandas as pd
df = pd.DataFrame({'x': range(50), 'y': range(50) + np.random.randn(50)})
# sns.jointplot(x='x', y='y', data=df) # This line would generate the plot
print("Output: A figure shows a scatter plot with histograms for each axis.")

Output: A figure shows a scatter plot with histograms for each axis.

#90. plt.show()
Displays all open figures.

import matplotlib.pyplot as plt
plt.plot([1, 2, 3])
# plt.show() # In a script, this is essential to see the plot.
print("Executes the command to render and display the plot.")

Executes the command to render and display the plot.

---
#DataAnalysis #ScikitLearn #Modeling #Preprocessing

Part 9: Scikit-learn - Modeling & Preprocessing

#91. train_test_split()
Splits arrays or matrices into random train and test subsets.

from sklearn.model_selection import train_test_split
import numpy as np
X, y = np.arange(10).reshape((5, 2)), range(5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (3, 2)
X_test shape: (2, 2)

#92. StandardScaler()
Standardizes features by removing the mean and scaling to unit variance.

from sklearn.preprocessing import StandardScaler
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
print(scaler.fit_transform(data))

[[-1. -1.]
 [-1. -1.]
 [ 1.  1.]
 [ 1.  1.]]

#93. MinMaxScaler()
Transforms features by scaling each feature to a given range, typically [0, 1].

from sklearn.preprocessing import MinMaxScaler
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler()
print(scaler.fit_transform(data))

[[0.   0.  ]
 [0.25 0.25]
 [0.5  0.5 ]
 [1.   1.  ]]

#94. LabelEncoder()
Encodes target labels with values between 0 and n_classes-1.

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
encoded = le.fit_transform(['paris', 'tokyo', 'paris'])
print(encoded)

[0 1 0]

#95. OneHotEncoder()
Encodes categorical features as a one-hot numeric array.

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
X = [['Male'], ['Female'], ['Female']]
print(enc.fit_transform(X).toarray())

[[0. 1.]
 [1. 0.]
 [1. 0.]]

#96. LinearRegression()
Ordinary least squares Linear Regression model.

from sklearn.linear_model import LinearRegression
X = [[0], [1], [2]]
y = [0, 1, 2]
reg = LinearRegression().fit(X, y)
print(f"Coefficient: {reg.coef_[0]}")

❤1

705 views17:02

About

Blog

Apps

Platform