Pythin Script to Extract Email Data from Linkedin 2023 Database 12-30-2023, 01:32 PM
#1
Handy Script
Just Replace [US|United States] to desired country to extract specific country Emails
Just Replace [US|United States] to desired country to extract specific country Emails
Code:
import re
from tkinter import Tk, filedialog
def extract_info(row):
# Define the regex pattern for email
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
# Define the regex pattern for US or United States
country_pattern = r'\b(?:US|United States)\b'
# Find emails in the row
emails = re.findall(email_pattern, row)
# Check if any email matches the country pattern
for email in emails:
if re.search(country_pattern, row):
# Extract name (assuming it's before the email) and country from the row
name_match = re.search(r'([A-Za-z]+ [A-Za-z]+)', row)
name = name_match.group(1) if name_match else None
country = re.search(country_pattern, row).group() if re.search(country_pattern, row) else None
return email, name, country
return None, None, None
def process_file(input_file_path, output_file_path):
try:
with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file:
for line in input_file:
email, name, country = extract_info(line)
if email:
output_file.write(f"Email: {email}, Name: {name}, Country: {country}\n")
print(f"Extraction completed. Results saved to {output_file_path}")
except Exception as e:
print(f"Error processing file: {e}")
def browse_files():
Tk().withdraw() # We don't want a full GUI, so keep the root window from appearing
input_file_path = filedialog.askopenfilename(title="Select an input file") # Show an "Open" dialog box and return the path to the selected file
if input_file_path:
output_file_path = filedialog.asksaveasfilename(title="Select an output file", defaultextension=".txt", filetypes=[("Text files", "*.txt")])
if output_file_path:
process_file(input_file_path, output_file_path)
# Create a simple GUI
if __name__ == "__main__":
browse_files()