Files
linkedin-bot/index.ts

625 lines
23 KiB
TypeScript

import { firefox } from 'playwright';
import { existsSync, mkdirSync } from 'fs';
import * as dotenv from 'dotenv';
import pino from 'pino';
import { Page } from 'playwright';
import { Locator } from 'playwright';
dotenv.config();
import path from 'path';
const logger = pino({
transport: {
target: "pino-pretty",
options: {
colorize: true,
},
},
});
const STORAGE_PATH = 'linkedin_state.json';
export interface Connection {
name: string; // Person's name
profileUrl: string; // Base profile URL without query params
mutualConnections: number; // Number of mutual connections
location?: string; // Location if available
headline?: string; // Professional headline if available
}
const main = async () => {
logger.info('Starting LinkedIn bot with Firefox on Linux...');
const browser = await firefox.launch({
headless: true,
firefoxUserPrefs: {
'dom.webdriver.enabled': false,
'media.peerconnection.enabled': false,
'useAutomationExtension': false,
},
});
const context = existsSync(STORAGE_PATH)
? await browser.newContext({
storageState: STORAGE_PATH,
userAgent:
'Mozilla/5.0 (X11; Linux x86_64; rv:117.0) Gecko/20100101 Firefox/117.0',
viewport: { width: 1280, height: 720 },
})
: await browser.newContext({
userAgent:
'Mozilla/5.0 (X11; Linux x86_64; rv:117.0) Gecko/20100101 Firefox/117.0',
viewport: { width: 1280, height: 720 },
});
const page = await context.newPage();
if (!existsSync(STORAGE_PATH)) {
const username = process.env.LINKEDIN_EMAIL;
const password = process.env.LINKEDIN_PASSWORD;
if (!username || !password) {
logger.error('Missing LinkedIn credentials in .env file');
process.exit(1);
}
logger.info('Navigating to LinkedIn login page...');
await page.goto('https://www.linkedin.com/login');
await page.screenshot({ path: '01-login-page.png' });
// Close possible popups
try {
const closeSelectors = [
'button[aria-label="Dismiss"]',
'button[aria-label="Close"]',
'button[data-test-modal-close-button]',
'.artdeco-modal__dismiss',
];
for (const selector of closeSelectors) {
const btn = await page.$(selector);
if (btn) {
await btn.click();
await page.waitForTimeout(1000);
break;
}
}
} catch (err) {
logger.warn('Popup check failed:', err);
}
// Login input
const usernameSelector = 'input[name="session_key"]';
await page.waitForSelector(usernameSelector, { state: 'visible', timeout: 15000 });
await page.click(usernameSelector);
await page.waitForTimeout(2000);
await page.click(usernameSelector);
await page.type(usernameSelector, username, { delay: 100 });
const passwordSelector = 'input[name="session_password"]';
await page.waitForSelector(passwordSelector, { state: 'visible', timeout: 15000 });
await page.click(passwordSelector);
await page.waitForTimeout(2000);
await page.click(passwordSelector);
await page.type(passwordSelector, password, { delay: 100 });
await page.screenshot({ path: '02-filled-credentials.png' });
const submitSelector = 'button[type="submit"]';
await page.waitForSelector(submitSelector, { state: 'visible', timeout: 15000 });
await page.click(submitSelector);
await page.screenshot({ path: '03-after-submit.png' });
// Save session state immediately
await context.storageState({ path: STORAGE_PATH });
logger.info('Waiting for feed page...');
await page.waitForURL('**/feed*', { timeout: 30000, waitUntil: 'networkidle' });
await page.screenshot({ path: '04-feed.png' });
} else {
logger.info('Using saved session.');
await page.goto('https://www.linkedin.com/feed/');
await page.screenshot({ path: 'feed-page-reuse-session.png' });
}
const page_conn_test = await context.newPage();
await openConnectionsAndScreenshot(page_conn_test);
const page_connections_secondary = await context.newPage();
const connections = await scrapeConnections(page_connections_secondary);
logger.info(`Total connections found: ${connections.length}`);
// Sort by mutual connections (ascending) and take the first 25
const sortedConnections = connections
.sort((a, b) => a.mutualConnections - b.mutualConnections)
.slice(0, 25);
logger.info(`Selected top 25 connections with least mutual connections:`);
sortedConnections.forEach((conn, idx) => {
logger.info(`${idx + 1}. ${conn.name} (${conn.mutualConnections} mutual connections)`);
});
const page_connection = await context.newPage();
await connectToAllProfiles(page_connection, sortedConnections);
logger.info('Waiting 3 seconds before finishing...');
await page.waitForTimeout(3000);
await browser.close();
logger.info('Browser closed, done.');
};
main();
export async function openConnectionsAndScreenshot(page: Page) {
logger.info('Opening LinkedIn connections page...');
await page.goto('https://www.linkedin.com/search/results/people/?network=%5B%22S%22%5D&origin=MEMBER_PROFILE_CANNED_SEARCH&page=2', {
waitUntil: 'domcontentloaded', // faster and avoids hanging
timeout: 30000, // reduce from 60s to 30s
});
// Let LinkedIn JS load dynamic content
await page.waitForTimeout(5000);
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const screenshotPath = `linkedin_connections_${timestamp}.png`;
await page.screenshot({ path: screenshotPath, fullPage: true });
logger.info(`📸 Screenshot saved to ${screenshotPath}`);
}
export async function scrapeConnections(page: Page): Promise<Connection[]> {
logger.info('📸 Opening LinkedIn connections page...');
// Create pagination directory if it doesn't exist
if (!existsSync('pagination')) {
mkdirSync('pagination');
}
const allConnections: Connection[] = [];
let currentPage = 1;
while (true) {
const url = `https://www.linkedin.com/search/results/people/?network=%5B%22S%22%5D&origin=FACETED_SEARCH&page=${currentPage}`;
logger.info(`🌐 Navigating to page ${currentPage}: ${url}`);
await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: 45000,
});
await page.waitForTimeout(3000);
// Screenshot
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const screenshotPath = `pagination/page_${currentPage}_${timestamp}.png`;
await page.screenshot({ path: screenshotPath, fullPage: true });
logger.info(`📸 Saved screenshot to ${screenshotPath}`);
// Wait for search results to load
try {
await page.waitForSelector('.linked-area', { timeout: 10000 });
} catch (err) {
logger.warn('No .linked-area found on page, might be end of results');
break;
}
// Scrape profile cards - using the correct selector based on the HTML structure
const cards = await page.$$('.linked-area');
const pageConnections: Connection[] = [];
logger.info(`Found ${cards.length} connection cards on page ${currentPage}`);
for (let i = 0; i < cards.length; i++) {
try {
const card = cards[i];
logger.info(`Processing card ${i + 1}...`);
// Check if this profile has a "Message" button (indicating already connected)
const messageButtonSelectors = [
'button[aria-label*="Message"]',
'button:has-text("Message")',
'.artdeco-button:has-text("Message")'
];
let hasMessageButton = false;
for (const selector of messageButtonSelectors) {
const messageButton = await card.$(selector);
if (messageButton) {
hasMessageButton = true;
const ariaLabel = await messageButton.getAttribute('aria-label');
logger.info(`Card ${i + 1}: Found message button with aria-label: "${ariaLabel}"`);
break;
}
}
if (hasMessageButton) {
logger.info(`Card ${i + 1}: Skipping profile - already connected (has Message button)`);
continue;
}
// Find the profile link - it's an anchor tag with href containing "/in/"
const profileLink = await card.$('a[href*="/in/"]');
if (!profileLink) {
logger.info(`Card ${i + 1}: No profile link found`);
continue;
}
const rawUrl = await profileLink.getAttribute('href');
if (!rawUrl) {
logger.info(`Card ${i + 1}: No href attribute found`);
continue;
}
const cleanUrl = rawUrl.split('?')[0];
logger.info(`Card ${i + 1}: Profile URL = ${cleanUrl}`);
// Extract name - try multiple selectors to find the name
let name = 'Unknown';
// Try different name selectors based on LinkedIn's structure
const nameSelectors = [
'span[dir="ltr"] span[aria-hidden="true"]',
'a[href*="/in/"] span[aria-hidden="true"]',
'a[data-test-app-aware-link] span[aria-hidden="true"]',
'.artdeco-entity-lockup__title a span[aria-hidden="true"]',
'span.ZGqwDIzKYyWZGPNHFVsMdJIrNpzbSChPdgBEBE span[aria-hidden="true"]'
];
for (const selector of nameSelectors) {
const nameElement = await card.$(selector);
if (nameElement) {
const extractedName = (await nameElement.textContent())?.trim();
if (extractedName && extractedName !== '') {
name = extractedName;
logger.info(`Card ${i + 1}: Found name with selector "${selector}": ${name}`);
break;
}
}
}
if (name === 'Unknown') {
// Try to find any link text within profile links
const allProfileLinks = await card.$$('a[href*="/in/"]');
for (const link of allProfileLinks) {
const linkText = (await link.textContent())?.trim();
if (linkText && linkText.length > 0 && !linkText.includes('View') && !linkText.includes('mutual')) {
name = linkText;
logger.info(`Card ${i + 1}: Found name from link text: ${name}`);
break;
}
}
}
logger.info(`Card ${i + 1}: Name = ${name}`);
// Extract location - look for text that appears to be location
const locationElements = await card.$$('div.t-14.t-normal');
let location = '';
for (const locEl of locationElements) {
const text = (await locEl.textContent())?.trim() || '';
// Location usually contains state abbreviations or city names
if (text && (text.includes(',') || text.match(/\b[A-Z]{2}\b/))) {
location = text;
break;
}
}
logger.info(`Card ${i + 1}: Location = ${location || 'Not found'}`);
// Extract headline - usually the first t-14 t-black t-normal div
const headlineElement = await card.$('div.t-14.t-black.t-normal');
const headline = headlineElement ? (await headlineElement.textContent())?.trim() || '' : '';
logger.info(`Card ${i + 1}: Headline = ${headline || 'Not found'}`);
// Extract mutual connections count
let mutualConnections = 0;
try {
const mutualElement = await card.$('a[href*="facetConnectionOf"] strong');
if (mutualElement) {
const mutualText = (await mutualElement.textContent())?.trim() || '';
const match = mutualText.match(/(\d+)/);
if (match) {
mutualConnections = parseInt(match[1], 10);
}
logger.info(`Card ${i + 1}: Mutual connections text = "${mutualText}", parsed = ${mutualConnections}`);
} else {
logger.info(`Card ${i + 1}: No mutual connections element found`);
}
} catch (err) {
logger.info(`Card ${i + 1}: Error extracting mutual connections: ${err}`);
}
if (name !== 'Unknown' && cleanUrl) {
const connection: Connection = {
name,
profileUrl: cleanUrl,
mutualConnections,
location: location || undefined,
headline: headline || undefined
};
logger.info(`Card ${i + 1}: Final connection object:`, connection);
pageConnections.push(connection);
} else {
logger.info(`Card ${i + 1}: Skipped - name: "${name}", URL: "${cleanUrl}"`);
// Still add it even if name is Unknown since URLs are valid
if (cleanUrl) {
const connection: Connection = {
name: name === 'Unknown' ? `Profile_${i + 1}` : name,
profileUrl: cleanUrl,
mutualConnections,
location: location || undefined,
headline: headline || undefined
};
logger.info(`Card ${i + 1}: Adding with fallback name:`, connection);
pageConnections.push(connection);
}
}
} catch (err) {
logger.warn(`Error processing card ${i + 1}:`, err);
continue;
}
}
logger.info(`📥 Page ${currentPage}: Collected ${pageConnections.length} profiles (after filtering out already connected).`);
// Always add connections from this page (even if 0)
allConnections.push(...pageConnections);
// Only stop if there are no profile cards at all (true end of results)
if (cards.length === 0) {
logger.info('No profile cards found on this page, reached end of results');
break;
}
currentPage++;
// Add delay before next page to be respectful
await page.waitForTimeout(2000);
// Stop after reasonable number of pages to avoid infinite loops
if (currentPage > 10) {
logger.warn('Reached maximum page limit (10), stopping pagination');
break;
}
}
logger.info(`Total connections scraped: ${allConnections.length} (after filtering out already connected profiles)`);
return allConnections;
}
export async function connectToProfile(page: Page, connection: Connection): Promise<boolean> {
logger.info(`Attempting to connect to ${connection.name} at ${connection.profileUrl}`);
try {
// Navigate to the profile
await page.goto(connection.profileUrl, {
waitUntil: 'domcontentloaded',
timeout: 30000,
});
// Wait for page to load
await page.waitForTimeout(3000);
// Scroll to ensure buttons are in viewport
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight / 3));
await page.waitForTimeout(1000);
// Look for the Connect button
const connectButtonSelectors = [
'button[aria-label*="Invite"][aria-label*="to connect"]',
'button:has-text("Connect")',
'.artdeco-button--primary:has-text("Connect")',
'button.artdeco-button--primary[aria-label*="connect"]',
'button.artdeco-button:has-text("Connect")'
];
let connectButton = null;
for (const selector of connectButtonSelectors) {
try {
connectButton = await page.$(selector);
if (connectButton) {
// Scroll the element into view
await connectButton.scrollIntoViewIfNeeded();
await page.waitForTimeout(500);
const isVisible = await connectButton.isVisible();
if (isVisible) {
logger.info(`Found connect button with selector: ${selector}`);
break;
} else {
logger.info(`Found connect button with selector: ${selector} but not visible, trying next`);
connectButton = null;
}
}
} catch (err) {
// Continue to next selector
continue;
}
}
if (!connectButton) {
logger.warn(`No connect button found for ${connection.name}`);
return false;
}
// Check if button is visible and enabled
const isVisible = await connectButton.isVisible();
const isEnabled = await connectButton.isEnabled();
if (!isVisible || !isEnabled) {
logger.warn(`Connect button not clickable for ${connection.name} (visible: ${isVisible}, enabled: ${isEnabled})`);
return false;
}
// Click the connect button
logger.info(`Clicking connect button for ${connection.name}`);
await connectButton.click();
// Wait for potential modal to appear
await page.waitForTimeout(1500);
// Check for the invitation modal
try {
const modalSelector = '[data-test-modal][role="dialog"]';
const modal = await page.$(modalSelector);
if (modal) {
logger.info(`Invitation modal appeared for ${connection.name}`);
// Look for "Send without a note" button
const sendWithoutNoteSelectors = [
'button[aria-label="Send without a note"]',
'button:has-text("Send without a note")',
'.artdeco-button--primary:has-text("Send without a note")'
];
let sendButton = null;
for (const selector of sendWithoutNoteSelectors) {
try {
sendButton = await modal.$(selector);
if (sendButton) {
logger.info(`Found "Send without a note" button with selector: ${selector}`);
break;
}
} catch (err) {
continue;
}
}
if (sendButton) {
const isButtonVisible = await sendButton.isVisible();
const isButtonEnabled = await sendButton.isEnabled();
if (isButtonVisible && isButtonEnabled) {
logger.info(`Clicking "Send without a note" for ${connection.name}`);
await sendButton.click();
// Wait for modal to disappear
await page.waitForTimeout(2000);
// Verify the modal is gone (success indicator)
const modalStillExists = await page.$(modalSelector);
if (!modalStillExists) {
logger.info(`Successfully sent connection request to ${connection.name}`);
return true;
} else {
logger.warn(`Modal still exists after clicking send for ${connection.name}`);
return false;
}
} else {
logger.warn(`Send button not clickable for ${connection.name} (visible: ${isButtonVisible}, enabled: ${isButtonEnabled})`);
return false;
}
} else {
logger.warn(`Could not find "Send without a note" button for ${connection.name}`);
return false;
}
} else {
// No modal appeared, connection might have been sent directly
logger.info(`No modal appeared for ${connection.name}, connection may have been sent directly`);
// Wait a bit and check if connect button changed or disappeared
await page.waitForTimeout(1000);
const updatedConnectButton = await page.$(connectButtonSelectors[0]);
if (!updatedConnectButton) {
logger.info(`Connect button disappeared for ${connection.name}, likely successful`);
return true;
} else {
// Check if button text changed to something like "Pending" or "Sent"
const buttonText = await updatedConnectButton.textContent();
if (buttonText && (buttonText.includes('Pending') || buttonText.includes('Sent'))) {
logger.info(`Connect button changed to "${buttonText}" for ${connection.name}, successful`);
return true;
} else {
logger.warn(`Connect button still present with text "${buttonText}" for ${connection.name}`);
return false;
}
}
}
} catch (err) {
logger.error(`Error handling modal for ${connection.name}:`, err);
return false;
}
} catch (err) {
logger.error(`Error connecting to ${connection.name}:`, err);
return false;
}
}
export async function connectToAllProfiles(page: Page, connections: Connection[]): Promise<void> {
logger.info(`Starting connection process for ${connections.length} profiles`);
let successCount = 0;
let failureCount = 0;
let errorCount = 0;
const failedConnections: Connection[] = [];
const errorConnections: { connection: Connection; error: string }[] = [];
for (let i = 0; i < connections.length; i++) {
const connection = connections[i];
logger.info(`\n=== Processing ${i + 1}/${connections.length}: ${connection.name} (${connection.mutualConnections} mutual connections) ===`);
try {
const success = await connectToProfile(page, connection);
if (success) {
successCount++;
logger.info(`✅ SUCCESS: Connected to ${connection.name} (${successCount} total successes)`);
} else {
failureCount++;
failedConnections.push(connection);
logger.warn(`❌ FAILED: Could not connect to ${connection.name} (${failureCount} total failures)`);
}
// Add delay between connection attempts to be respectful
const delayMs = Math.floor(Math.random() * 3000) + 2000; // 2-5 second random delay
logger.info(`⏳ Waiting ${delayMs}ms before next connection attempt...`);
await page.waitForTimeout(delayMs);
} catch (error) {
errorCount++;
const errorMessage = error instanceof Error ? error.message : String(error);
errorConnections.push({ connection, error: errorMessage });
logger.error(`💥 ERROR processing ${connection.name}: ${errorMessage}`);
// Longer delay after errors to recover
logger.info(`⏳ Waiting 5 seconds after error before continuing...`);
await page.waitForTimeout(5000);
}
// Progress update every 10 connections
if ((i + 1) % 10 === 0) {
logger.info(`\n📊 PROGRESS UPDATE (${i + 1}/${connections.length}):`);
logger.info(` ✅ Successes: ${successCount}`);
logger.info(` ❌ Failures: ${failureCount}`);
logger.info(` 💥 Errors: ${errorCount}`);
logger.info(` 📈 Success Rate: ${((successCount / (i + 1)) * 100).toFixed(1)}%`);
}
}
// Final summary
logger.info(`\n🏁 FINAL RESULTS:`);
logger.info(` Total Processed: ${connections.length}`);
logger.info(` ✅ Successful Connections: ${successCount}`);
logger.info(` ❌ Failed Connections: ${failureCount}`);
logger.info(` 💥 Errors: ${errorCount}`);
logger.info(` 📈 Overall Success Rate: ${((successCount / connections.length) * 100).toFixed(1)}%`);
// Log failed connections for review
if (failedConnections.length > 0) {
logger.warn(`\n❌ FAILED CONNECTIONS (${failedConnections.length}):`);
failedConnections.forEach((conn, idx) => {
logger.warn(` ${idx + 1}. ${conn.name} - ${conn.profileUrl} (${conn.mutualConnections} mutual)`);
});
}
// Log error connections for debugging
if (errorConnections.length > 0) {
logger.error(`\n💥 ERROR CONNECTIONS (${errorConnections.length}):`);
errorConnections.forEach((item, idx) => {
logger.error(` ${idx + 1}. ${item.connection.name} - Error: ${item.error}`);
});
}
logger.info(`\n🎉 Connection process completed!`);
}