# Interview Power Move: Parallel Merging
from concurrent.futures import ThreadPoolExecutor
from PyPDF2 import PdfMerger
def parallel_merge(pdf_list, output, max_workers=4):
chunks = [pdf_list[i::max_workers] for i in range(max_workers)]
temp_files = []
def merge_chunk(chunk, idx):
temp = f"temp_{idx}.pdf"
merger = PdfMerger()
for pdf in chunk:
merger.append(pdf)
merger.write(temp)
return temp
with ThreadPoolExecutor() as executor:
temp_files = list(executor.map(merge_chunk, chunks, range(max_workers)))
# Final merge of chunks
final_merger = PdfMerger()
for temp in temp_files:
final_merger.append(temp)
final_merger.write(output)
parallel_merge(["doc1.pdf", "doc2.pdf", ...], "parallel_merge.pdf")
# Pro Tip: Validate PDFs before merging
from PyPDF2 import PdfReader
def is_valid_pdf(path):
try:
with open(path, "rb") as f:
reader = PdfReader(f)
return len(reader.pages) > 0
except:
return False
valid_pdfs = [f for f in pdf_files if is_valid_pdf(f)]
merger.append(valid_pdfs) # Only merge valid files
# Real-World Case Study: Invoice Processing Pipeline
import glob
from PyPDF2 import PdfMerger
def process_monthly_invoices():
# 1. Download invoices from SFTP
download_invoices("sftp://vendor.com/invoices/*.pdf")
# 2. Validate and sort
invoices = sorted(
[f for f in glob.glob("invoices/*.pdf") if is_valid_pdf(f)],
key=lambda x: extract_invoice_date(x)
)
# 3. Merge with cover page
merger = PdfMerger()
merger.append("cover_template.pdf")
for inv in invoices:
merger.append(inv, outline_item=get_client_name(inv))
# 4. Add metadata and encrypt
merger.add_metadata({"/InvoiceCount": str(len(invoices))})
merger.encrypt(owner_pwd="finance_team_2023")
merger.write(f"Q3_Invoices_{datetime.now().strftime('%Y%m')}.pdf")
# 5. Upload to secure storage
upload_to_s3("secure-bucket/processed/", "Q3_Invoices.pdf")
process_monthly_invoices()
By: https://t.iss.one/DataScience4
#Python #PDFProcessing #DocumentAutomation #PyPDF2 #CodingInterview #BackendDevelopment #FileHandling #DataEngineering #TechJobs #Programming #SystemDesign #DeveloperTips #CareerGrowth #CloudComputing #Docker #Microservices #Productivity #TechTips #Python3 #SoftwareEngineering
Forwarded from Machine Learning
In Python, building AI-powered Telegram bots unlocks massive potential for image generation, processing, and automationβmaster this to create viral tools and ace full-stack interviews! π€
Learn more: https://hackmd.io/@husseinsheikho/building-AI-powered-Telegram-bots
https://t.iss.one/DataScienceMπ¦Ύ
# Basic Bot Setup - The foundation (PTB v20+ Async)
from telegram.ext import Application, CommandHandler, MessageHandler, filters
async def start(update, context):
await update.message.reply_text(
"β¨ AI Image Bot Active!\n"
"/generate - Create images from text\n"
"/enhance - Improve photo quality\n"
"/help - Full command list"
)
app = Application.builder().token("YOUR_BOT_TOKEN").build()
app.add_handler(CommandHandler("start", start))
app.run_polling()
# Image Generation - DALL-E Integration (OpenAI)
import openai
from telegram.ext import ContextTypes
openai.api_key = os.getenv("OPENAI_API_KEY")
async def generate(update: Update, context: ContextTypes.DEFAULT_TYPE):
if not context.args:
await update.message.reply_text("β Usage: /generate cute robot astronaut")
return
prompt = " ".join(context.args)
try:
response = openai.Image.create(
prompt=prompt,
n=1,
size="1024x1024"
)
await update.message.reply_photo(
photo=response['data'][0]['url'],
caption=f"π¨ Generated: *{prompt}*",
parse_mode="Markdown"
)
except Exception as e:
await update.message.reply_text(f"π₯ Error: {str(e)}")
app.add_handler(CommandHandler("generate", generate))
Learn more: https://hackmd.io/@husseinsheikho/building-AI-powered-Telegram-bots
#Python #TelegramBot #AI #ImageGeneration #StableDiffusion #OpenAI #MachineLearning #CodingInterview #FullStack #Chatbots #DeepLearning #ComputerVision #Programming #TechJobs #DeveloperTips #CareerGrowth #CloudComputing #Docker #APIs #Python3 #Productivity #TechTips
https://t.iss.one/DataScienceM
Please open Telegram to view this post
VIEW IN TELEGRAM
β€1
Advice for Python, UV, and Docker ππ³
Sometimes dependencies are better installed separately from the code β this noticeably speeds up the compilation of Docker images π
The idea is simple: first, we install dependencies, then we add the project π
Why is this necessary:
β’ Docker caches layers and does not rebuild them unnecessarily β‘οΈ
β’ if only the code changes β the dependencies are taken from the cache πΎ
β’ if the dependencies change β only the corresponding layer is rebuilt π
β’ without this, any minor change triggers a full reinstallation π
Example:
#Python #Docker #DevOps #UV #SoftwareEngineering #TechTips
β¨ Join Best TG Channels https://t.iss.one/addlist/0f6vfFbEMdAwODBk
βοΈ Join Our WhatsApp Channel https://whatsapp.com/channel/0029VaC7Weq29753hpcggW2A
π Level up your AI & Data Science skills with HelloEncyclo β a growing all-in-one platform featuring hands-on courses in LLMs, Deep Learning, MLOps, Data Engineering, and more.
β 13 courses live + 40+ coming soon
π― One access, lifetime updates
π Use code: PRESALE-BOOK-WAVE-2GFG
π https://helloencyclo.com/?ref=HUSSEINSHEIKHO
Sometimes dependencies are better installed separately from the code β this noticeably speeds up the compilation of Docker images π
The idea is simple: first, we install dependencies, then we add the project π
Why is this necessary:
β’ Docker caches layers and does not rebuild them unnecessarily β‘οΈ
β’ if only the code changes β the dependencies are taken from the cache πΎ
β’ if the dependencies change β only the corresponding layer is rebuilt π
β’ without this, any minor change triggers a full reinstallation π
Example:
RUN --mount=type=cache,target=/root/.cache/uv --mount=type=bind,source=uv.lock,target=uv.lock --mount=type=bind,source=pyproject.toml,target=pyproject.toml uv sync --locked --no-install-project
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv uv sync --locked
#Python #Docker #DevOps #UV #SoftwareEngineering #TechTips
β¨ Join Best TG Channels https://t.iss.one/addlist/0f6vfFbEMdAwODBk
βοΈ Join Our WhatsApp Channel https://whatsapp.com/channel/0029VaC7Weq29753hpcggW2A
π Level up your AI & Data Science skills with HelloEncyclo β a growing all-in-one platform featuring hands-on courses in LLMs, Deep Learning, MLOps, Data Engineering, and more.
β 13 courses live + 40+ coming soon
π― One access, lifetime updates
π Use code: PRESALE-BOOK-WAVE-2GFG
π https://helloencyclo.com/?ref=HUSSEINSHEIKHO
β€4