Files
stonkNewsYahoo/main.py
2025-02-06 20:36:35 -06:00

581 lines
8.2 KiB
Python

from fastapi import FastAPI
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import redis
from time import sleep
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
from transformers import pipeline
pipe = pipeline("text-classification", model="ProsusAI/finbert")
tickers = [
"AOS",
"ABT",
"ABBV",
"ACN",
"ADBE",
"AMD",
"AES",
"AFL",
"A",
"APD",
"ABNB",
"AKAM",
"ALB",
"ARE",
"ALGN",
"ALLE",
"LNT",
"ALL",
"GOOGL",
"GOOG",
"MO",
"AMZN",
"AMCR",
"AEE",
"AEP",
"AXP",
"AIG",
"AMT",
"AWK",
"AMP",
"AME",
"AMGN",
"APH",
"ADI",
"ANSS",
"AON",
"APA",
"APO",
"AAPL",
"AMAT",
"APTV",
"ACGL",
"ADM",
"ANET",
"AJG",
"AIZ",
"T",
"ATO",
"ADSK",
"ADP",
"AZO",
"AVB",
"AVY",
"AXON",
"BKR",
"BALL",
"BAC",
"BAX",
"BDX",
"BRK.B",
"BBY",
"TECH",
"BIIB",
"BLK",
"BX",
"BK",
"BA",
"BKNG",
"BWA",
"BSX",
"BMY",
"AVGO",
"BR",
"BRO",
"BF.B",
"BLDR",
"BG",
"BXP",
"CHRW",
"CDNS",
"CZR",
"CPT",
"CPB",
"COF",
"CAH",
"KMX",
"CCL",
"CARR",
"CAT",
"CBOE",
"CBRE",
"CDW",
"CE",
"COR",
"CNC",
"CNP",
"CF",
"CRL",
"SCHW",
"CHTR",
"CVX",
"CMG",
"CB",
"CHD",
"CI",
"CINF",
"CTAS",
"CSCO",
"C",
"CFG",
"CLX",
"CME",
"CMS",
"KO",
"CTSH",
"CL",
"CMCSA",
"CAG",
"COP",
"ED",
"STZ",
"CEG",
"COO",
"CPRT",
"GLW",
"CPAY",
"CTVA",
"CSGP",
"COST",
"CTRA",
"CRWD",
"CCI",
"CSX",
"CMI",
"CVS",
"DHR",
"DRI",
"DVA",
"DAY",
"DECK",
"DE",
"DELL",
"DAL",
"DVN",
"DXCM",
"FANG",
"DLR",
"DFS",
"DG",
"DLTR",
"D",
"DPZ",
"DOV",
"DOW",
"DHI",
"DTE",
"DUK",
"DD",
"EMN",
"ETN",
"EBAY",
"ECL",
"EIX",
"EW",
"EA",
"ELV",
"EMR",
"ENPH",
"ETR",
"EOG",
"EPAM",
"EQT",
"EFX",
"EQIX",
"EQR",
"ERIE",
"ESS",
"EL",
"EG",
"EVRG",
"ES",
"EXC",
"EXPE",
"EXPD",
"EXR",
"XOM",
"FFIV",
"FDS",
"FICO",
"FAST",
"FRT",
"FDX",
"FIS",
"FITB",
"FSLR",
"FE",
"FI",
"FMC",
"F",
"FTNT",
"FTV",
"FOXA",
"FOX",
"BEN",
"FCX",
"GRMN",
"IT",
"GE",
"GEHC",
"GEV",
"GEN",
"GNRC",
"GD",
"GIS",
"GM",
"GPC",
"GILD",
"GPN",
"GL",
"GDDY",
"GS",
"HAL",
"HIG",
"HAS",
"HCA",
"DOC",
"HSIC",
"HSY",
"HES",
"HPE",
"HLT",
"HOLX",
"HD",
"HON",
"HRL",
"HST",
"HWM",
"HPQ",
"HUBB",
"HUM",
"HBAN",
"HII",
"IBM",
"IEX",
"IDXX",
"ITW",
"INCY",
"IR",
"PODD",
"INTC",
"ICE",
"IFF",
"IP",
"IPG",
"INTU",
"ISRG",
"IVZ",
"INVH",
"IQV",
"IRM",
"JBHT",
"JBL",
"JKHY",
"J",
"JNJ",
"JCI",
"JPM",
"JNPR",
"K",
"KVUE",
"KDP",
"KEY",
"KEYS",
"KMB",
"KIM",
"KMI",
"KKR",
"KLAC",
"KHC",
"KR",
"LHX",
"LH",
"LRCX",
"LW",
"LVS",
"LDOS",
"LEN",
"LII",
"LLY",
"LIN",
"LYV",
"LKQ",
"LMT",
"L",
"LOW",
"LULU",
"LYB",
"MTB",
"MPC",
"MKTX",
"MAR",
"MMC",
"MLM",
"MAS",
"MA",
"MTCH",
"MKC",
"MCD",
"MCK",
"MDT",
"MRK",
"META",
"MET",
"MTD",
"MGM",
"MCHP",
"MU",
"MSFT",
"MAA",
"MRNA",
"MHK",
"MOH",
"TAP",
"MDLZ",
"MPWR",
"MNST",
"MCO",
"MS",
"MOS",
"MSI",
"MSCI",
"NDAQ",
"NTAP",
"NFLX",
"NEM",
"NWSA",
"NWS",
"NEE",
"NKE",
"NI",
"NDSN",
"NSC",
"NTRS",
"NOC",
"NCLH",
"NRG",
"NUE",
"NVDA",
"NVR",
"NXPI",
"ORLY",
"OXY",
"ODFL",
"OMC",
"ON",
"OKE",
"ORCL",
"OTIS",
"PCAR",
"PKG",
"PLTR",
"PANW",
"PARA",
"PH",
"PAYX",
"PAYC",
"PYPL",
"PNR",
"PEP",
"PFE",
"PCG",
"PM",
"PSX",
"PNW",
"PNC",
"POOL",
"PPG",
"PPL",
"PFG",
"PG",
"PGR",
"PLD",
"PRU",
"PEG",
"PTC",
"PSA",
"PHM",
"PWR",
"QCOM",
"DGX",
"RL",
"RJF",
"RTX",
"O",
"REG",
"REGN",
"RF",
"RSG",
"RMD",
"RVTY",
"ROK",
"ROL",
"ROP",
"ROST",
"RCL",
"SPGI",
"CRM",
"SBAC",
"SLB",
"STX",
"SRE",
"NOW",
"SHW",
"SPG",
"SWKS",
"SJM",
"SW",
"SNA",
"SOLV",
"SO",
"LUV",
"SWK",
"SBUX",
"STT",
"STLD",
"STE",
"SYK",
"SMCI",
"SYF",
"SNPS",
"SYY",
"TMUS",
"TROW",
"TTWO",
"TPR",
"TRGP",
"TGT",
"TEL",
"TDY",
"TFX",
"TER",
"TSLA",
"TXN",
"TPL",
"TXT",
"TMO",
"TJX",
"TSCO",
"TT",
"TDG",
"TRV",
"TRMB",
"TFC",
"TYL",
"TSN",
"USB",
"UBER",
"UDR",
"ULTA",
"UNP",
"UAL",
"UPS",
"URI",
"UNH",
"UHS",
"VLO",
"VTR",
"VLTO",
"VRSN",
"VRSK",
"VZ",
"VRTX",
"VTRS",
"VICI",
"V",
"VST",
"VMC",
"WRB",
"GWW",
"WAB",
"WBA",
"WMT",
"DIS",
"WBD",
"WM",
"WAT",
"WEC",
"WFC",
"WELL",
"WST",
"WDC",
"WY",
"WMB",
"WTW",
"WDAY",
"WYNN",
"XEL",
"XYL",
"YUM",
"ZBRA",
"ZBH",
"ZTS"
]
redis_client = redis.from_url("redis://192.168.1.23:6379")
from selenium.webdriver.chrome.options import Options
def get_html(link):
options = Options()
options.add_argument("--headless") # Run in headless mode
driver = webdriver.Chrome(options=options)
driver.get(link)
html_content = driver.page_source
driver.quit()
return html_content
def get_news_yahoo():
def fetch_news(ticker):
"""Fetch and publish news articles for a single ticker."""
print(f"Fetching news for {ticker}...")
try:
html_content = get_html(f"https://finance.yahoo.com/quote/{ticker}/news/")
soup = BeautifulSoup(html_content, 'html.parser')
articles = soup.find_all("section", attrs={"role": "article"})
for article in articles[:1]: # Only get the first article
info_element = article.find("a", class_="subtle-link")
if not info_element:
continue
title = info_element.get("title")
description = article.find("div", class_="content").select("a p")[0].text
url = info_element.get("href")
image_element = info_element.find("img")
image_url = image_element.get("src") if image_element else None
sentiment = pipe(description)[0]["label"]
print(sentiment)
if title and description and url and image_url and (sentiment == "positive" or sentiment == "negative"):
article_data = {
"tickers": [ticker],
"title": title,
"description": description,
"url": url,
"image_url": image_url
}
# Publish as soon as it is retrieved
print("published!")
redis_client.publish("notifications", json.dumps([article_data]))
except Exception as e:
print(f"Error fetching news for {ticker}: {e}")
while True:
with ThreadPoolExecutor(max_workers=5) as executor: # Adjust `max_workers` as needed
executor.map(fetch_news, tickers) # Run fetch_news concurrently for each ticker
sleep(3600) # Sleep for 20 minutes before the next fetch
get_news_yahoo()