Python | Machine Learning | Coding | R
67.8K subscribers
1.27K photos
94 videos
156 files
922 links
Help and ads: @hussein_sheikho

Discover powerful insights with Python, Machine Learning, Coding, and R—your essential toolkit for data-driven solutions, smart alg

List of our channels:
https://t.iss.one/addlist/8_rRW2scgfRhOTc0

https://telega.io/?r=nikapsOH
Download Telegram
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]}, index=['x', 'y', 'z'])
print(df.at['y', 'A'])

2


#24. df.iat[]
Accesses a single value by row/column integer position. Faster than .iloc.

import pandas as pd
df = pd.DataFrame({'A': [10, 20, 30]})
print(df.iat[1, 0])

20


#25. df.sample()
Returns a random sample of items from an axis of object.

import pandas as pd
df = pd.DataFrame({'A': range(10)})
print(df.sample(n=3))

A
8 8
2 2
5 5
(Note: Output rows will be random)

---
#DataAnalysis #Pandas #DataCleaning #Manipulation

Part 3: Pandas - Data Cleaning & Manipulation

#26. df.dropna()
Removes missing values.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3]})
print(df.dropna())

A
0 1.0
2 3.0


#27. df.fillna()
Fills missing (NA/NaN) values using a specified method.

import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, np.nan, 3]})
print(df.fillna(0))

A
0 1.0
1 0.0
2 3.0


#28. df.astype()
Casts a pandas object to a specified dtype.

import pandas as pd
df = pd.DataFrame({'A': [1.1, 2.7, 3.5]})
df['A'] = df['A'].astype(int)
print(df)

A
0 1
1 2
2 3


#29. df.rename()
Alters axes labels.

import pandas as pd
df = pd.DataFrame({'a': [1], 'b': [2]})
df_renamed = df.rename(columns={'a': 'A', 'b': 'B'})
print(df_renamed)

A  B
0 1 2


#30. df.drop()
Drops specified labels from rows or columns.

import pandas as pd
df = pd.DataFrame({'A': [1], 'B': [2], 'C': [3]})
df_dropped = df.drop(columns=['B'])
print(df_dropped)

A  C
0 1 3


#31. pd.to_datetime()
Converts argument to datetime.

import pandas as pd
s = pd.Series(['2023-01-01', '2023-01-02'])
dt_s = pd.to_datetime(s)
print(dt_s)

0   2023-01-01
1 2023-01-02
dtype: datetime64[ns]


#32. df.apply()
Applies a function along an axis of the DataFrame.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]})
df['B'] = df['A'].apply(lambda x: x * 2)
print(df)

A  B
0 1 2
1 2 4
2 3 6


#33. df['col'].map()
Maps values of a Series according to an input mapping or function.

import pandas as pd
df = pd.DataFrame({'Gender': ['M', 'F', 'M']})
df['Gender_Full'] = df['Gender'].map({'M': 'Male', 'F': 'Female'})
print(df)

Gender Gender_Full
0 M Male
1 F Female
2 M Male


#34. df.replace()
Replaces values given in to_replace with value.

import pandas as pd
df = pd.DataFrame({'Score': [10, -99, 15, -99]})
df_replaced = df.replace(-99, 0)
print(df_replaced)

Score
0 10
1 0
2 15
3 0


#35. df.duplicated()
Returns a boolean Series denoting duplicate rows.

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1], 'B': ['a', 'b', 'a']})
print(df.duplicated())

0    False
1 False
2 True
dtype: bool


#36. df.drop_duplicates()
Returns a DataFrame with duplicate rows removed.
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1], 'B': ['a', 'b', 'a']})
print(df.drop_duplicates())

A  B
0 1 a
1 2 b


#37. df.sort_values()
Sorts by the values along either axis.

import pandas as pd
df = pd.DataFrame({'Age': [25, 22, 30]})
print(df.sort_values(by='Age'))

Age
1 22
0 25
2 30


#38. df.sort_index()
Sorts object by labels (along an axis).

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]}, index=[10, 5, 8])
print(df.sort_index())

A
5 2
8 3
10 1


#39. pd.cut()
Bins values into discrete intervals.

import pandas as pd
ages = pd.Series([22, 35, 58, 8, 42])
age_bins = pd.cut(ages, bins=[0, 18, 35, 60], labels=['Child', 'Adult', 'Senior'])
print(age_bins)

0     Adult
1 Adult
2 Senior
3 Child
4 Senior
dtype: category
Categories (3, object): ['Child' < 'Adult' < 'Senior']


#40. pd.qcut()
Quantile-based discretization function (bins into equal-sized groups).

import pandas as pd
data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
quartiles = pd.qcut(data, 4, labels=False)
print(quartiles)

0    0
1 0
2 0
3 1
4 1
5 2
6 2
7 3
8 3
9 3
dtype: int64


#41. s.str.contains()
Tests if a pattern or regex is contained within a string of a Series.

import pandas as pd
s = pd.Series(['apple', 'banana', 'apricot'])
print(s[s.str.contains('ap')])

0      apple
2 apricot
dtype: object


#42. s.str.split()
Splits strings around a given separator/delimiter.

import pandas as pd
s = pd.Series(['a_b', 'c_d'])
print(s.str.split('_', expand=True))

0  1
0 a b
1 c d


#43. s.str.lower()
Converts strings in the Series to lowercase.

import pandas as pd
s = pd.Series(['HELLO', 'World'])
print(s.str.lower())

0    hello
1 world
dtype: object


#44. s.str.strip()
Removes leading and trailing whitespace.

import pandas as pd
s = pd.Series([' hello ', ' world '])
print(s.str.strip())

0    hello
1 world
dtype: object


#45. s.dt.year
Extracts the year from a datetime Series.

import pandas as pd
s = pd.to_datetime(pd.Series(['2023-01-01', '2024-05-10']))
print(s.dt.year)

0    2023
1 2024
dtype: int64

---
#DataAnalysis #Pandas #Grouping #Aggregation

Part 4: Pandas - Grouping & Aggregation

#46. df.groupby()
Groups a DataFrame using a mapper or by a Series of columns.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
grouped = df.groupby('Team')
print(grouped)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x...>


#47. groupby.agg()
Aggregates using one or more operations over the specified axis.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
agg_df = df.groupby('Team').agg(['mean', 'sum'])
print(agg_df)

Points     
mean sum
Team
A 11 22
B 7 14


#48. groupby.size()
Computes group sizes.
1
import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B', 'A']})
print(df.groupby('Team').size())

Team
A 3
B 2
dtype: int64


#49. groupby.count()
Computes the count of non-NA cells for each group.

import pandas as pd
import numpy as np
df = pd.DataFrame({'Team': ['A', 'B', 'A'], 'Score': [1, np.nan, 3]})
print(df.groupby('Team').count())

Score
Team
A 2
B 0


#50. groupby.mean()
Computes the mean of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').mean())

Points
Team
A 11
B 7


#51. groupby.sum()
Computes the sum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').sum())

Points
Team
A 22
B 14


#52. groupby.min()
Computes the minimum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').min())

Points
Team
A 10
B 6


#53. groupby.max()
Computes the maximum of group values.

import pandas as pd
df = pd.DataFrame({'Team': ['A', 'B', 'A', 'B'], 'Points': [10, 8, 12, 6]})
print(df.groupby('Team').max())

Points
Team
A 12
B 8


#54. df.pivot_table()
Creates a spreadsheet-style pivot table as a DataFrame.

import pandas as pd
df = pd.DataFrame({'A': ['foo', 'foo', 'bar'], 'B': ['one', 'two', 'one'], 'C': [1, 2, 3]})
pivot = df.pivot_table(values='C', index='A', columns='B')
print(pivot)

B    one  two
A
bar 3.0 NaN
foo 1.0 2.0


#55. pd.crosstab()
Computes a cross-tabulation of two (or more) factors.

import pandas as pd
df = pd.DataFrame({'A': ['foo', 'foo', 'bar'], 'B': ['one', 'two', 'one']})
crosstab = pd.crosstab(df.A, df.B)
print(crosstab)

B    one  two
A
bar 1 0
foo 1 1

---
#DataAnalysis #Pandas #Merging #Joining

Part 5: Pandas - Merging & Concatenating

#56. pd.merge()
Merges DataFrame or named Series objects with a database-style join.

import pandas as pd
df1 = pd.DataFrame({'key': ['A', 'B'], 'val1': [1, 2]})
df2 = pd.DataFrame({'key': ['A', 'B'], 'val2': [3, 4]})
merged = pd.merge(df1, df2, on='key')
print(merged)

key  val1  val2
0 A 1 3
1 B 2 4


#57. pd.concat()
Concatenates pandas objects along a particular axis.

import pandas as pd
df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'A': [3, 4]})
concatenated = pd.concat([df1, df2])
print(concatenated)

A
0 1
1 2
0 3
1 4


#58. df.join()
Joins columns with other DataFrame(s) on index or on a key column.
2🎉1