From b321bece0e02fbcf2b5afd647e8ba811257a6d92 Mon Sep 17 00:00:00 2001 From: Wojciech Kwolek Date: Mon, 9 May 2022 09:30:39 +0200 Subject: [PATCH] initial --- Dockerfile | 8 ++ Pipfile | 17 ++++ Pipfile.lock | 250 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 7 ++ main.py | 67 +++++++++++++ requirements.txt | 32 ++++++ state.py | 59 +++++++++++ test.py | 15 +++ tweets.py | 26 +++++ 9 files changed, 481 insertions(+) create mode 100644 Dockerfile create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 state.py create mode 100644 test.py create mode 100644 tweets.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b501526 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3 + +COPY ./requirements.txt /requirements.txt +RUN pip install -r /requirements.txt + +COPY . /src +WORKDIR /src +CMD python3 main.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..763275c --- /dev/null +++ b/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +feedparser = "*" +python-telegram-bot = "*" +markdownify = "*" + +[dev-packages] + +[requires] +python_version = "3.9" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..2dbfa62 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,250 @@ +{ + "_meta": { + "hash": { + "sha256": "ffd43f327cdb9db9a8d4d4b4e95e96b742cb370536a736fdcd0c4a54f5c529f4" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.9" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "anyio": { + "hashes": [ + "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6", + "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e" + ], + "markers": "python_full_version >= '3.6.2'", + "version": "==3.5.0" + }, + "apscheduler": { + "hashes": [ + "sha256:65e6574b6395498d371d045f2a8a7e4f7d50c6ad21ef7313d15b1c7cf20df1e3", + "sha256:ddc25a0ddd899de44d7f451f4375fb971887e65af51e41e5dcf681f59b8b2c9a" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==3.9.1" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30", + "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693" + ], + "markers": "python_version >= '3.6'", + "version": "==4.11.1" + }, + "cachetools": { + "hashes": [ + "sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6", + "sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4" + ], + "markers": "python_version ~= '3.7'", + "version": "==5.0.0" + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.12" + }, + "feedparser": { + "hashes": [ + "sha256:1b7f57841d9cf85074deb316ed2c795091a238adb79846bc46dccdaf80f9c59a", + "sha256:5ce0410a05ab248c8c7cfca3a0ea2203968ee9ff4486067379af4827a59f9661" + ], + "index": "pypi", + "version": "==6.0.8" + }, + "h11": { + "hashes": [ + "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6", + "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042" + ], + "markers": "python_version >= '3.6'", + "version": "==0.12.0" + }, + "httpcore": { + "hashes": [ + "sha256:47d772f754359e56dd9d892d9593b6f9870a37aeb8ba51e9a88b09b3d68cfade", + "sha256:7503ec1c0f559066e7e39bc4003fd2ce023d01cf51793e3c173b864eb456ead1" + ], + "markers": "python_version >= '3.6'", + "version": "==0.14.7" + }, + "httpx": { + "hashes": [ + "sha256:d8e778f76d9bbd46af49e7f062467e3157a5a3d2ae4876a4bbfd8a51ed9c9cb4", + "sha256:e35e83d1d2b9b2a609ef367cc4c1e66fd80b750348b20cc9e19d1952fc2ca3f6" + ], + "markers": "python_version >= '3.6'", + "version": "==0.22.0" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3" + }, + "markdownify": { + "hashes": [ + "sha256:d613f95b2649d8b83c4c4cb2e6c24cea8c7df4e07d418f92fa25ad0ddbb045e0", + "sha256:ef396bb8d0ffb3efacc08b86ab4aa6d36b234e782aea9f6b7f980798eaa64e33" + ], + "index": "pypi", + "version": "==0.11.2" + }, + "python-telegram-bot": { + "hashes": [ + "sha256:483b2b7d39508cb673b07814284d6c25706890c519d9aa8177becbadd969b4e2", + "sha256:a182a3d081071f1ea34833bc68ed7d0843c1fe0d6dca1d260a0e2d253b150f71" + ], + "index": "pypi", + "version": "==20.0a0" + }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "version": "==2022.1" + }, + "pytz-deprecation-shim": { + "hashes": [ + "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", + "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==0.1.0.post0" + }, + "rfc3986": { + "extras": [ + "idna2008" + ], + "hashes": [ + "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835", + "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97" + ], + "version": "==1.5.0" + }, + "setuptools": { + "hashes": [ + "sha256:26ead7d1f93efc0f8c804d9fafafbe4a44b179580a7105754b245155f9af05a8", + "sha256:47c7b0c0f8fc10eec4cf1e71c6fdadf8decaa74ffa087e68cd1c20db7ad6a592" + ], + "markers": "python_version >= '3.7'", + "version": "==62.1.0" + }, + "sgmllib3k": { + "hashes": [ + "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9" + ], + "version": "==1.0.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "sniffio": { + "hashes": [ + "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663", + "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de" + ], + "markers": "python_version >= '3.5'", + "version": "==1.2.0" + }, + "soupsieve": { + "hashes": [ + "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", + "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" + ], + "markers": "python_version >= '3.6'", + "version": "==2.3.2.post1" + }, + "tornado": { + "hashes": [ + "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb", + "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c", + "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288", + "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95", + "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558", + "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe", + "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791", + "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d", + "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326", + "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b", + "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4", + "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c", + "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910", + "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5", + "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c", + "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0", + "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675", + "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd", + "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f", + "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c", + "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea", + "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6", + "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05", + "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd", + "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575", + "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a", + "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37", + "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795", + "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f", + "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32", + "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c", + "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01", + "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4", + "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2", + "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921", + "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085", + "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df", + "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102", + "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5", + "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68", + "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5" + ], + "markers": "python_version >= '3.5'", + "version": "==6.1" + }, + "tzdata": { + "hashes": [ + "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9", + "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.1" + }, + "tzlocal": { + "hashes": [ + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" + ], + "markers": "python_version >= '3.6'", + "version": "==4.2" + } + }, + "develop": {} +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..eb04083 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +config - env variables + +* `TELEGRAM_TOKEN` +* `TELEGRAM_CHANNEL` +* `TWITTER_USER` +* `DB_PATH` +* `INTERVAL` diff --git a/main.py b/main.py new file mode 100644 index 0000000..d90514b --- /dev/null +++ b/main.py @@ -0,0 +1,67 @@ +import os +from pprint import pprint + +from markdownify import MarkdownConverter +from telegram import Update +from telegram.constants import ParseMode +from telegram.ext import ApplicationBuilder, CallbackContext, CommandHandler +from telegram.helpers import escape_markdown + +from state import State +from tweets import get_tweets + + +async def start(update: Update, context: CallbackContext.DEFAULT_TYPE): + await context.bot.send_message(chat_id=update.effective_chat.id, text="I'm a bot, hi.") + + +class TgMarkdownConverter(MarkdownConverter): + def convert_a(self, el, text, convert_as_inline): + el['href'] = escape_markdown(el['href'], version=2) + return super().convert_a(el, text, convert_as_inline) + + def process_text(self, el): + text = super().process_text(el) + return escape_markdown(text, version=2) + +def markdownify(html, **options): + return TgMarkdownConverter(**options).convert(html) + + +#print(markdownify("c")) + + +async def check(context: CallbackContext): + tweets = get_tweets(os.environ['TWITTER_USER']) + state = State(os.environ['DB_PATH']) + for tweet in tweets: + if state.has(tweet): + return + preamble = f"""\ + {tweet.author}
+Permalink

+""" + + md = markdownify(preamble+tweet.content) + + try: + await context.bot.send_message( + chat_id=os.environ['TELEGRAM_CHANNEL'], + text=md, + parse_mode=ParseMode.MARKDOWN_V2, + disable_web_page_preview=True) + except Exception: + break + state.add(tweet) + +if __name__ == '__main__': + application = ApplicationBuilder().token( + os.environ['TELEGRAM_TOKEN']).build() + + start_handler = CommandHandler('start', start) + application.add_handler(start_handler) + + application.job_queue.run_repeating( + check, int(os.environ['INTERVAL']), first=1, chat_id=os.environ['TELEGRAM_CHANNEL']) + + application.run_polling() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c5c9469 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,32 @@ +# +# These requirements were autogenerated by pipenv +# To regenerate from the project's Pipfile, run: +# +# pipenv lock --requirements +# + +-i https://pypi.org/simple +anyio==3.5.0; python_full_version >= '3.6.2' +apscheduler==3.9.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4' +beautifulsoup4==4.11.1; python_version >= '3.6' +cachetools==5.0.0; python_version ~= '3.7' +certifi==2021.10.8 +charset-normalizer==2.0.12; python_version >= '3.5' +feedparser==6.0.8 +h11==0.12.0; python_version >= '3.6' +httpcore==0.14.7; python_version >= '3.6' +httpx==0.22.0; python_version >= '3.6' +idna==3.3; python_version >= '3.5' +markdownify==0.11.2 +python-telegram-bot==20.0a0 +pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' +pytz==2022.1 +rfc3986[idna2008]==1.5.0 +setuptools==62.1.0; python_version >= '3.7' +sgmllib3k==1.0.0 +six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +sniffio==1.2.0; python_version >= '3.5' +soupsieve==2.3.2.post1; python_version >= '3.6' +tornado==6.1; python_version >= '3.5' +tzdata==2022.1; python_version >= '3.6' +tzlocal==4.2; python_version >= '3.6' diff --git a/state.py b/state.py new file mode 100644 index 0000000..ca5e94e --- /dev/null +++ b/state.py @@ -0,0 +1,59 @@ +import os +import pathlib +import pickle +import sqlite3 +import tempfile + +from tweets import Tweet + +class State: + def __init__(self, db: str) -> None: + self._file = db + con = self._connect() + cur = con.cursor() + cur.execute('''CREATE TABLE IF NOT EXISTS seen_tweets ( + id INTEGER PRIMARY KEY, + tweet_id TEXT UNIQUE);''') + con.commit() + con.close() + + + def _connect(self): + return sqlite3.connect(self._file) + + def add(self, tweet: Tweet): + con = self._connect() + cur = con.cursor() + cur.execute("INSERT INTO seen_tweets(tweet_id) VALUES (?)", (tweet.id,)) + con.commit() + + def has(self, tweet: Tweet): + con = self._connect() + cur = con.cursor() + rows = cur.execute("SELECT * FROM seen_tweets WHERE tweet_id=?", (tweet.id,)) + return rows.fetchone() is not None + + +if __name__ == '__main__': + with tempfile.NamedTemporaryFile() as tmp: + state = State(tmp.name) + + print('Inserting t1, t2') + t1 = Tweet(id="test1", author="test1", content="test1") + t2 = Tweet(id="test2", author="test2", content="test2") + t3 = Tweet(id="test3", author="test3", content="test3") + state.add(t1) + state.add(t2) + + print('Checking if they exist in db') + print('t1:', state.has(t1)) + assert state.has(t1) + print('t2:', state.has(t2)) + assert state.has(t2) + print('t3:', state.has(t3)) + assert not state.has(t3) + + + + + diff --git a/test.py b/test.py new file mode 100644 index 0000000..8d6cf9f --- /dev/null +++ b/test.py @@ -0,0 +1,15 @@ +from pprint import pprint + +from state import State +from tweets import get_tweets + +piechocinski = get_tweets('piechocinski') +state = State("./test.db") + +print("tweets:") +for tweet in piechocinski: + if state.has(tweet): continue + pprint(tweet) + state.add(tweet) + + diff --git a/tweets.py b/tweets.py new file mode 100644 index 0000000..a5b6e5e --- /dev/null +++ b/tweets.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from pprint import pprint +from typing import List + +import feedparser + + +@dataclass +class Tweet: + id: str + author: str + content: str + + @classmethod + def from_rss_entry(cls, entry: feedparser.FeedParserDict): + return Tweet( + id=entry['id'], + author=entry['author'], + content=entry['summary'], + ) + + +def get_tweets(username: str) -> List[Tweet]: + newsfeed = feedparser.parse(f"https://nitter.net/{username}/rss") + return [Tweet.from_rss_entry(t) for t in newsfeed.entries] +