commit b321bece0e02fbcf2b5afd647e8ba811257a6d92 Author: Wojciech Kwolek Date: Mon May 9 09:30:39 2022 +0200 initial diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b501526 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3 + +COPY ./requirements.txt /requirements.txt +RUN pip install -r /requirements.txt + +COPY . /src +WORKDIR /src +CMD python3 main.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..763275c --- /dev/null +++ b/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +feedparser = "*" +python-telegram-bot = "*" +markdownify = "*" + +[dev-packages] + +[requires] +python_version = "3.9" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..2dbfa62 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,250 @@ +{ + "_meta": { + "hash": { + "sha256": "ffd43f327cdb9db9a8d4d4b4e95e96b742cb370536a736fdcd0c4a54f5c529f4" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.9" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "anyio": { + "hashes": [ + "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6", + "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e" + ], + "markers": "python_full_version >= '3.6.2'", + "version": "==3.5.0" + }, + "apscheduler": { + "hashes": [ + "sha256:65e6574b6395498d371d045f2a8a7e4f7d50c6ad21ef7313d15b1c7cf20df1e3", + "sha256:ddc25a0ddd899de44d7f451f4375fb971887e65af51e41e5dcf681f59b8b2c9a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==3.9.1" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30", + "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693" + ], + "markers": "python_version >= '3.6'", + "version": "==4.11.1" + }, + "cachetools": { + "hashes": [ + "sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6", + "sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4" + ], + "markers": "python_version ~= '3.7'", + "version": "==5.0.0" + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.12" + }, + "feedparser": { + "hashes": [ + "sha256:1b7f57841d9cf85074deb316ed2c795091a238adb79846bc46dccdaf80f9c59a", + "sha256:5ce0410a05ab248c8c7cfca3a0ea2203968ee9ff4486067379af4827a59f9661" + ], + "index": "pypi", + "version": "==6.0.8" + }, + "h11": { + "hashes": [ + "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6", + "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042" + ], + "markers": "python_version >= '3.6'", + "version": "==0.12.0" + }, + "httpcore": { + "hashes": [ + "sha256:47d772f754359e56dd9d892d9593b6f9870a37aeb8ba51e9a88b09b3d68cfade", + "sha256:7503ec1c0f559066e7e39bc4003fd2ce023d01cf51793e3c173b864eb456ead1" + ], + "markers": "python_version >= '3.6'", + "version": "==0.14.7" + }, + "httpx": { + "hashes": [ + "sha256:d8e778f76d9bbd46af49e7f062467e3157a5a3d2ae4876a4bbfd8a51ed9c9cb4", + "sha256:e35e83d1d2b9b2a609ef367cc4c1e66fd80b750348b20cc9e19d1952fc2ca3f6" + ], + "markers": "python_version >= '3.6'", + "version": "==0.22.0" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3" + }, + "markdownify": { + "hashes": [ + "sha256:d613f95b2649d8b83c4c4cb2e6c24cea8c7df4e07d418f92fa25ad0ddbb045e0", + "sha256:ef396bb8d0ffb3efacc08b86ab4aa6d36b234e782aea9f6b7f980798eaa64e33" + ], + "index": "pypi", + "version": "==0.11.2" + }, + "python-telegram-bot": { + "hashes": [ + "sha256:483b2b7d39508cb673b07814284d6c25706890c519d9aa8177becbadd969b4e2", + "sha256:a182a3d081071f1ea34833bc68ed7d0843c1fe0d6dca1d260a0e2d253b150f71" + ], + "index": "pypi", + "version": "==20.0a0" + }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "version": "==2022.1" + }, + "pytz-deprecation-shim": { + "hashes": [ + "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", + "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==0.1.0.post0" + }, + "rfc3986": { + "extras": [ + "idna2008" + ], + "hashes": [ + "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835", + "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97" + ], + "version": "==1.5.0" + }, + "setuptools": { + "hashes": [ + "sha256:26ead7d1f93efc0f8c804d9fafafbe4a44b179580a7105754b245155f9af05a8", + "sha256:47c7b0c0f8fc10eec4cf1e71c6fdadf8decaa74ffa087e68cd1c20db7ad6a592" + ], + "markers": "python_version >= '3.7'", + "version": "==62.1.0" + }, + "sgmllib3k": { + "hashes": [ + "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9" + ], + "version": "==1.0.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "sniffio": { + "hashes": [ + "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663", + "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de" + ], + "markers": "python_version >= '3.5'", + "version": "==1.2.0" + }, + "soupsieve": { + "hashes": [ + "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", + "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" + ], + "markers": "python_version >= '3.6'", + "version": "==2.3.2.post1" + }, + "tornado": { + "hashes": [ + "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb", + "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c", + "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288", + "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95", + "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558", + "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe", + "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791", + "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d", + "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326", + "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b", + "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4", + "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c", + "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910", + "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5", + "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c", + "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0", + "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675", + "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd", + "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f", + "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c", + "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea", + "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6", + "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05", + "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd", + "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575", + "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a", + "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37", + "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795", + "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f", + "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32", + "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c", + "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01", + "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4", + "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2", + "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921", + "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085", + "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df", + "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102", + "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5", + "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68", + "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5" + ], + "markers": "python_version >= '3.5'", + "version": "==6.1" + }, + "tzdata": { + "hashes": [ + "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9", + "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.1" + }, + "tzlocal": { + "hashes": [ + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" + ], + "markers": "python_version >= '3.6'", + "version": "==4.2" + } + }, + "develop": {} +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..eb04083 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +config - env variables + +* `TELEGRAM_TOKEN` +* `TELEGRAM_CHANNEL` +* `TWITTER_USER` +* `DB_PATH` +* `INTERVAL` diff --git a/main.py b/main.py new file mode 100644 index 0000000..d90514b --- /dev/null +++ b/main.py @@ -0,0 +1,67 @@ +import os +from pprint import pprint + +from markdownify import MarkdownConverter +from telegram import Update +from telegram.constants import ParseMode +from telegram.ext import ApplicationBuilder, CallbackContext, CommandHandler +from telegram.helpers import escape_markdown + +from state import State +from tweets import get_tweets + + +async def start(update: Update, context: CallbackContext.DEFAULT_TYPE): + await context.bot.send_message(chat_id=update.effective_chat.id, text="I'm a bot, hi.") + + +class TgMarkdownConverter(MarkdownConverter): + def convert_a(self, el, text, convert_as_inline): + el['href'] = escape_markdown(el['href'], version=2) + return super().convert_a(el, text, convert_as_inline) + + def process_text(self, el): + text = super().process_text(el) + return escape_markdown(text, version=2) + +def markdownify(html, **options): + return TgMarkdownConverter(**options).convert(html) + + +#print(markdownify("c")) + + +async def check(context: CallbackContext): + tweets = get_tweets(os.environ['TWITTER_USER']) + state = State(os.environ['DB_PATH']) + for tweet in tweets: + if state.has(tweet): + return + preamble = f"""\ + {tweet.author}
+Permalink

+""" + + md = markdownify(preamble+tweet.content) + + try: + await context.bot.send_message( + chat_id=os.environ['TELEGRAM_CHANNEL'], + text=md, + parse_mode=ParseMode.MARKDOWN_V2, + disable_web_page_preview=True) + except Exception: + break + state.add(tweet) + +if __name__ == '__main__': + application = ApplicationBuilder().token( + os.environ['TELEGRAM_TOKEN']).build() + + start_handler = CommandHandler('start', start) + application.add_handler(start_handler) + + application.job_queue.run_repeating( + check, int(os.environ['INTERVAL']), first=1, chat_id=os.environ['TELEGRAM_CHANNEL']) + + application.run_polling() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c5c9469 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,32 @@ +# +# These requirements were autogenerated by pipenv +# To regenerate from the project's Pipfile, run: +# +# pipenv lock --requirements +# + +-i https://pypi.org/simple +anyio==3.5.0; python_full_version >= '3.6.2' +apscheduler==3.9.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4' +beautifulsoup4==4.11.1; python_version >= '3.6' +cachetools==5.0.0; python_version ~= '3.7' +certifi==2021.10.8 +charset-normalizer==2.0.12; python_version >= '3.5' +feedparser==6.0.8 +h11==0.12.0; python_version >= '3.6' +httpcore==0.14.7; python_version >= '3.6' +httpx==0.22.0; python_version >= '3.6' +idna==3.3; python_version >= '3.5' +markdownify==0.11.2 +python-telegram-bot==20.0a0 +pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' +pytz==2022.1 +rfc3986[idna2008]==1.5.0 +setuptools==62.1.0; python_version >= '3.7' +sgmllib3k==1.0.0 +six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +sniffio==1.2.0; python_version >= '3.5' +soupsieve==2.3.2.post1; python_version >= '3.6' +tornado==6.1; python_version >= '3.5' +tzdata==2022.1; python_version >= '3.6' +tzlocal==4.2; python_version >= '3.6' diff --git a/state.py b/state.py new file mode 100644 index 0000000..ca5e94e --- /dev/null +++ b/state.py @@ -0,0 +1,59 @@ +import os +import pathlib +import pickle +import sqlite3 +import tempfile + +from tweets import Tweet + +class State: + def __init__(self, db: str) -> None: + self._file = db + con = self._connect() + cur = con.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS seen_tweets ( + id INTEGER PRIMARY KEY, + tweet_id TEXT UNIQUE);''') + con.commit() + con.close() + + + def _connect(self): + return sqlite3.connect(self._file) + + def add(self, tweet: Tweet): + con = self._connect() + cur = con.cursor() + cur.execute("INSERT INTO seen_tweets(tweet_id) VALUES (?)", (tweet.id,)) + con.commit() + + def has(self, tweet: Tweet): + con = self._connect() + cur = con.cursor() + rows = cur.execute("SELECT * FROM seen_tweets WHERE tweet_id=?", (tweet.id,)) + return rows.fetchone() is not None + + +if __name__ == '__main__': + with tempfile.NamedTemporaryFile() as tmp: + state = State(tmp.name) + + print('Inserting t1, t2') + t1 = Tweet(id="test1", author="test1", content="test1") + t2 = Tweet(id="test2", author="test2", content="test2") + t3 = Tweet(id="test3", author="test3", content="test3") + state.add(t1) + state.add(t2) + + print('Checking if they exist in db') + print('t1:', state.has(t1)) + assert state.has(t1) + print('t2:', state.has(t2)) + assert state.has(t2) + print('t3:', state.has(t3)) + assert not state.has(t3) + + + + + diff --git a/test.py b/test.py new file mode 100644 index 0000000..8d6cf9f --- /dev/null +++ b/test.py @@ -0,0 +1,15 @@ +from pprint import pprint + +from state import State +from tweets import get_tweets + +piechocinski = get_tweets('piechocinski') +state = State("./test.db") + +print("tweets:") +for tweet in piechocinski: + if state.has(tweet): continue + pprint(tweet) + state.add(tweet) + + diff --git a/tweets.py b/tweets.py new file mode 100644 index 0000000..a5b6e5e --- /dev/null +++ b/tweets.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from pprint import pprint +from typing import List + +import feedparser + + +@dataclass +class Tweet: + id: str + author: str + content: str + + @classmethod + def from_rss_entry(cls, entry: feedparser.FeedParserDict): + return Tweet( + id=entry['id'], + author=entry['author'], + content=entry['summary'], + ) + + +def get_tweets(username: str) -> List[Tweet]: + newsfeed = feedparser.parse(f"https://nitter.net/{username}/rss") + return [Tweet.from_rss_entry(t) for t in newsfeed.entries] +