diff --git a/src/server.ts b/src/server.ts index e45af59..3bee672 100644 --- a/src/server.ts +++ b/src/server.ts @@ -7,6 +7,7 @@ import { Strings } from './strings'; const router = Router(); +/* Handler for status (Tweet) request */ const statusRequest = async ( request: Request, event: FetchEvent, @@ -16,11 +17,31 @@ const statusRequest = async ( const url = new URL(request.url); const userAgent = request.headers.get('User-Agent') || ''; + /* User Agent matching for embed generators, bots, crawlers, and other automated + tools. It's pretty all-encompassing. Note that Firefox/92 is in here because + Discord sometimes uses the following UA: + + Mozilla/5.0 (Macintosh; Intel Mac OS X 11.6; rv:92.0) Gecko/20100101 Firefox/92.0 + + I'm not sure why that specific one, it's pretty weird, but this edge case ensures + stuff keeps working. + + On the very rare off chance someone happens to be using specifically Firefox 92, + the http-equiv="refresh" meta tag will ensure an actual human is sent to the destination. */ const isBotUA = userAgent.match( /bot|facebook|embed|got|firefox\/92|curl|wget|go-http|yahoo|generator|whatsapp|preview|link|proxy|vkshare|images|analyzer|index|crawl|spider|python|cfnetwork/gi ) !== null; + /* Check if domain is a direct media domain (i.e. d.fxtwitter.com), + the tweet is prefixed with /dl/ or /dir/ (for TwitFix interop), or the + tweet ends in .mp4, .jpg, or .png + + Note that .png is not documented because images always redirect to a jpg, + but it will help someone who does it mistakenly on something like Discord + + Also note that all we're doing here is setting the direct flag. If someone + links a video and ends it with .jpg, it will still redirect to a .mp4! */ if ( url.pathname.match(/\/status(es)?\/\d{2,20}\.(mp4|png|jpg)/g) !== null || Constants.DIRECT_MEDIA_DOMAINS.includes(url.hostname) || @@ -31,6 +52,8 @@ const statusRequest = async ( flags.direct = true; } + /* The pxtwitter.com domain is deprecated and Tweets posted after deprecation + date will have a notice saying we've moved to fxtwitter.com! */ if ( Constants.DEPRECATED_DOMAIN_LIST.includes(url.hostname) && BigInt(id?.match(/\d{2,20}/g)?.[0] || 0) > Constants.DEPRECATED_DOMAIN_EPOCH @@ -39,6 +62,10 @@ const statusRequest = async ( flags.deprecated = true; } + /* Check if request is to api.fxtwitter.com, or the tweet is appended with .json + Note that unlike TwitFix, FixTweet will never generate embeds for .json, and + in fact we only support .json because it's what people using TwitFix API would + be used to. */ if ( url.pathname.match(/\/status(es)?\/\d{2,20}\.(json)/g) !== null || Constants.API_HOST_LIST.includes(url.hostname) @@ -47,11 +74,11 @@ const statusRequest = async ( flags.api = true; } + /* Direct media or API access bypasses bot check, returning same response regardless of UA */ if (isBotUA || flags.direct || flags.api) { console.log(`Matched bot UA ${userAgent}`); - let response: Response; - + /* This throws the necessary data to handleStatus (in status.ts) */ const statusResponse = await handleStatus( id?.match(/\d{2,20}/)?.[0] || '0', mediaNumber ? parseInt(mediaNumber) : undefined, @@ -59,39 +86,49 @@ const statusRequest = async ( flags, language ); - + + /* Complete responses are normally sent just by errors. Normal embeds send a `text` value. */ if (statusResponse.response) { console.log('handleStatus sent response'); - response = statusResponse.response; + return statusResponse.response; } else if (statusResponse.text) { - /* Fallback if a person browses to a direct media link with a Tweet without media */ + console.log('handleStatus sent embed'); + /* We're checking if the User Agent is a bot again specifically in case they requested + direct media (d.fxtwitter.com, .mp4/.jpg, etc) but the Tweet contains no media. + + Since we obviously have no media to give the user, we'll just redirect to the Tweet. + Embeds will return as usual to bots as if direct media was never specified. */ if (!isBotUA) { return Response.redirect(`${Constants.TWITTER_ROOT}/${handle}/status/${id}`, 302); } - console.log('handleStatus sent embed'); - response = new Response(statusResponse.text, { + /* Return the response containing embed information */ + return new Response(statusResponse.text, { headers: Constants.RESPONSE_HEADERS, status: 200 }); } else { - response = new Response(Strings.ERROR_UNKNOWN, { + /* Somehow handleStatus sent us nothing. This should *never* happen, but we have a case for it. */ + return new Response(Strings.ERROR_UNKNOWN, { headers: Constants.RESPONSE_HEADERS, status: 500 }); } - - return response; } else { + /* A human has clicked a fxtwitter.com/:screen_name/status/:id link! + Obviously we just need to redirect to the Tweet directly.*/ console.log('Matched human UA', userAgent); return Response.redirect(`${Constants.TWITTER_ROOT}/${handle}/status/${id?.match(/\d{2,20}/)?.[0]}`, 302); } }; +/* Redirects to user profile when linked. + We don't do any fancy special embeds yet, just Twitter default embeds. */ const profileRequest = async (request: Request) => { const { handle } = request.params; const url = new URL(request.url); + /* If not a valid screen name, we redirect to project GitHub */ if (handle.match(/\w{1,15}/gi)?.[0] !== handle) { return Response.redirect(Constants.REDIRECT_URL, 302); } else { @@ -99,6 +136,8 @@ const profileRequest = async (request: Request) => { } }; +/* TODO: is there any way to consolidate these stupid routes for itty-router? + I couldn't find documentation allowing for regex matching */ router.get('/:prefix?/:handle/status/:id', statusRequest); router.get('/:prefix?/:handle/status/:id/photo/:mediaNumber', statusRequest); router.get('/:prefix?/:handle/status/:id/photos/:mediaNumber', statusRequest); @@ -112,6 +151,9 @@ router.get('/:prefix?/:handle/statuses/:id/:language', statusRequest); router.get('/status/:id', statusRequest); router.get('/status/:id/:language', statusRequest); +/* Oembeds (used by Discord to enhance responses) + +Yes, I actually made the endpoint /owoembed. Deal with it. */ router.get('/owoembed', async (request: Request) => { console.log('oembed hit!'); const { searchParams } = new URL(request.url); @@ -126,6 +168,8 @@ router.get('/owoembed', async (request: Request) => { author_url: `${Constants.TWITTER_ROOT}/${encodeURIComponent( author )}/status/${encodeURIComponent(status)}`, + /* Change provider name if tweet is on deprecated domain. + TODO: Implement rotating messages/links */ provider_name: searchParams.get('deprecated') === 'true' ? Strings.DEPRECATED_DOMAIN_NOTICE_DISCORD @@ -135,6 +179,7 @@ router.get('/owoembed', async (request: Request) => { type: 'link', version: '1.0' }; + /* Stringify and send it on its way! */ return new Response(JSON.stringify(test), { headers: { ...Constants.RESPONSE_HEADERS, @@ -144,9 +189,14 @@ router.get('/owoembed', async (request: Request) => { }); }); +/* Pass through profile requests to Twitter. + We don't currently have custom profile cards yet, + but it's something we might do. Maybe. */ router.get('/:handle', profileRequest); router.get('/:handle/', profileRequest); +/* If we don't understand the route structure at all, we'll + redirect to GitHub (normal domains) or API docs (api.fxtwitter.com) */ router.get('*', async (request: Request) => { const url = new URL(request.url); @@ -156,6 +206,7 @@ router.get('*', async (request: Request) => { return Response.redirect(Constants.REDIRECT_URL, 302); }); +/* Wrapper to handle caching, and misc things like catching robots.txt */ export const cacheWrapper = async ( request: Request, event?: FetchEvent @@ -176,7 +227,8 @@ export const cacheWrapper = async ( const cacheKey = new Request(cacheUrl.toString(), request); const cache = caches.default; - /* Itty-router doesn't seem to like routing file names for some reason */ + /* Itty-router doesn't seem to like routing file names because whatever, + so we just handle it in the caching layer instead. Kinda hacky, but whatever. */ if (cacheUrl.pathname === '/robots.txt') { return new Response(Constants.ROBOTS_TXT, { headers: { @@ -187,6 +239,11 @@ export const cacheWrapper = async ( }); } + /* Some TwitFix APIs will never be available in FixTweet for privacy or + design choice reasons. + + Trying to access these APIs result in a message saying TwitFix API + has been sunset. */ if ( cacheUrl.pathname.startsWith('/api/') || cacheUrl.pathname.startsWith('/other/') || @@ -212,12 +269,13 @@ export const cacheWrapper = async ( console.log('Cache miss'); } + /* Literally do not know what the hell eslint is complaining about */ // eslint-disable-next-line no-case-declarations const response = await router.handle(request, event); - // Store the fetched response as cacheKey - // Use waitUntil so you can return the response without blocking on - // writing to cache + /* Store the fetched response as cacheKey + Use waitUntil so you can return the response without blocking on + writing to cache */ event && event.waitUntil(cache.put(cacheKey, response.clone())); return response; @@ -227,11 +285,13 @@ export const cacheWrapper = async ( console.log('Purging cache as requested'); await cache.delete(cacheKey); return new Response('', { status: 200 }); + /* yes, we dd give HEAD */ case 'HEAD': return new Response('', { headers: Constants.RESPONSE_HEADERS, status: 200 }); + /* We properly state our OPTIONS when asked */ case 'OPTIONS': return new Response('', { headers: { @@ -244,13 +304,16 @@ export const cacheWrapper = async ( } }; +/* Wrapper around Sentry, used for catching uncaught exceptions */ const sentryWrapper = async (event: FetchEvent, test = false): Promise => { let sentry: null | Toucan = null; if (typeof SENTRY_DSN !== 'undefined' && !test) { + /* We use Toucan for Sentry. Toucan is a Sentry SDK designed for Cloudflare Workers / DOs */ sentry = new Toucan({ dsn: SENTRY_DSN, - context: event, // Includes 'waitUntil', which is essential for Sentry logs to be delivered. Also includes 'request' -- no need to set it separately. + context: event, /* Includes 'waitUntil', which is essential for Sentry logs to be delivered. + Also includes 'request' -- no need to set it separately. */ allowedHeaders: /(.*)/, allowedSearchParams: /(.*)/, release: RELEASE_NAME, @@ -261,6 +324,8 @@ const sentryWrapper = async (event: FetchEvent, test = false): Promise => }); } + /* Responds with either a returned response (good!!!) or returns + a crash response (bad!!!) */ event.respondWith( (async (): Promise => { try { @@ -281,10 +346,7 @@ const sentryWrapper = async (event: FetchEvent, test = false): Promise => ); }; -/* May be undefined in test scenarios */ -if (typeof addEventListener !== 'undefined') { - /* Event to receive web requests on Cloudflare Worker */ - addEventListener('fetch', (event: FetchEvent) => { - sentryWrapper(event); - }); -} +/* Event to receive web requests on Cloudflare Worker */ +addEventListener('fetch', (event: FetchEvent) => { + sentryWrapper(event); +});