fxtwitter-docker/src/fetch.ts
2023-11-12 02:43:10 -05:00

309 lines
10 KiB
TypeScript

import { Context } from 'hono';
import { Constants } from './constants';
import { Experiment, experimentCheck } from './experiments';
import { generateUserAgent } from './helpers/useragent';
import { withTimeout } from './helpers/utils';
const API_ATTEMPTS = 3;
let wasElongatorDisabled = false;
const generateSnowflake = () => {
const epoch = 1288834974657n; /* Twitter snowflake epoch */
const timestamp = BigInt(Date.now()) - epoch;
return String((timestamp << 22n) | BigInt(Math.floor(Math.random() * 696969)));
};
export const twitterFetch = async (
c: Context,
url: string,
useElongator = experimentCheck(
Experiment.ELONGATOR_BY_DEFAULT,
typeof c.env?.TwitterProxy !== 'undefined'
),
validateFunction: (response: unknown) => boolean,
elongatorRequired = false
): Promise<unknown> => {
let apiAttempts = 0;
let newTokenGenerated = false;
const [userAgent, secChUa] = generateUserAgent();
console.log(`Outgoing useragent for this request:`, userAgent);
const tokenHeaders: { [header: string]: string } = {
'Authorization': Constants.GUEST_BEARER_TOKEN,
'User-Agent': userAgent,
'sec-ch-ua': secChUa,
...Constants.BASE_HEADERS
};
const guestTokenRequest = new Request(`${Constants.TWITTER_API_ROOT}/1.1/guest/activate.json`, {
method: 'POST',
headers: tokenHeaders,
cf: {
cacheEverything: true,
cacheTtl: Constants.GUEST_TOKEN_MAX_AGE
},
body: ''
});
/* A dummy version of the request only used for Cloudflare caching purposes.
The reason it exists at all is because Cloudflare won't cache POST requests. */
const guestTokenRequestCacheDummy = new Request(
`${Constants.TWITTER_API_ROOT}/1.1/guest/activate.json`,
{
method: 'GET',
cf: {
cacheEverything: true,
cacheTtl: Constants.GUEST_TOKEN_MAX_AGE
}
}
);
const cache = caches.default;
while (apiAttempts < API_ATTEMPTS) {
/* Generate a random CSRF token, Twitter just cares that header and cookie match,
REST can use shorter csrf tokens (32 bytes) but graphql prefers 160 bytes */
const csrfToken = crypto.randomUUID().replace(/-/g, '');
const headers: Record<string, string> = {
Authorization: Constants.GUEST_BEARER_TOKEN,
...Constants.BASE_HEADERS
};
apiAttempts++;
let activate: Response | null = null;
if (!newTokenGenerated && !useElongator) {
const timeBefore = performance.now();
const cachedResponse = await cache.match(guestTokenRequestCacheDummy.clone());
const timeAfter = performance.now();
console.log(`Searched cache for token, took ${timeAfter - timeBefore}ms`);
if (cachedResponse) {
console.log('Token cache hit');
activate = cachedResponse;
} else {
console.log('Token cache miss');
newTokenGenerated = true;
}
}
if (newTokenGenerated || (activate === null && !useElongator)) {
/* Let's get a guest token to call the API.
Back in the day (2022), this was pretty much unlimited and gave us nearly unlimited read-only access to Twitter.
Since the Elon buyout, this has become more stringent with rate limits, NSFW tweets not loading with this method,
among other seemingly arbitrary restrictions and quirks. */
const timeBefore = performance.now();
activate = await fetch(guestTokenRequest.clone());
const timeAfter = performance.now();
console.log(`Guest token request after ${timeAfter - timeBefore}ms`);
}
/* Let's grab that guest_token so we can use it */
let activateJson: { guest_token: string };
try {
activateJson = (await activate?.clone().json()) as { guest_token: string };
} catch (e: unknown) {
continue;
}
/* Elongator doesn't need guestToken, so we just make up a snowflake */
const guestToken = activateJson?.guest_token || generateSnowflake();
console.log(newTokenGenerated ? 'Activated guest:' : 'Using guest:', activateJson);
/* Just some cookies to mimick what the Twitter Web App would send */
headers['Cookie'] = [
`guest_id_ads=v1%3A${guestToken}`,
`guest_id_marketing=v1%3A${guestToken}`,
`guest_id=v1%3A${guestToken}`,
`ct0=${csrfToken};`
].join('; ');
headers['x-csrf-token'] = csrfToken;
headers['x-twitter-active-user'] = 'yes';
headers['x-guest-token'] = guestToken;
let response: unknown;
let apiRequest: Response | null = null;
try {
if (useElongator && typeof c.env?.TwitterProxy !== 'undefined') {
console.log('Fetching using elongator');
const performanceStart = performance.now();
apiRequest = await withTimeout(
(signal: AbortSignal) =>
c.env?.TwitterProxy.fetch(url, {
method: 'GET',
headers: headers,
signal: signal
})
);
const performanceEnd = performance.now();
console.log(`Elongator request successful after ${performanceEnd - performanceStart}ms`);
} else {
const performanceStart = performance.now();
apiRequest = await withTimeout((signal: AbortSignal) =>
fetch(url, {
method: 'GET',
headers: headers,
signal: signal
})
);
const performanceEnd = performance.now();
console.log(`Guest API request successful after ${performanceEnd - performanceStart}ms`);
}
response = await apiRequest?.json();
} catch (e: unknown) {
/* We'll usually only hit this if we get an invalid response from Twitter.
It's uncommon, but it happens */
console.error('Unknown error while fetching from API', e);
/* Elongator returns strings to communicate downstream errors */
if (String(e).indexOf('Status not found') !== -1) {
console.log('Tweet was not found');
return null;
}
try {
!useElongator &&
c.executionCtx &&
c.executionCtx.waitUntil(
cache.delete(guestTokenRequestCacheDummy.clone(), { ignoreMethod: true })
);
} catch (error) {
console.error((error as Error).stack);
}
if (useElongator) {
console.log('Elongator request failed, trying again without it');
wasElongatorDisabled = true;
}
newTokenGenerated = true;
useElongator = false;
continue;
}
if (
!wasElongatorDisabled &&
!useElongator &&
typeof c.env?.TwitterProxy !== 'undefined' &&
(response as TweetResultsByRestIdResult)?.data?.tweetResult?.result?.reason ===
'NsfwLoggedOut'
) {
console.log(`nsfw tweet detected, it's elongator time`);
useElongator = true;
continue;
}
const remainingRateLimit = parseInt(apiRequest?.headers.get('x-rate-limit-remaining') || '0');
console.log(`Remaining rate limit: ${remainingRateLimit} requests`);
/* Running out of requests within our rate limit, let's purge the cache */
if (!useElongator && remainingRateLimit < 10) {
console.log(`Purging token on this edge due to low rate limit remaining`);
try {
c.executionCtx &&
c.executionCtx.waitUntil(
cache.delete(guestTokenRequestCacheDummy.clone(), { ignoreMethod: true })
);
} catch (error) {
console.error((error as Error).stack);
}
}
if (!validateFunction(response)) {
console.log('Failed to fetch response, got', JSON.stringify(response));
if (elongatorRequired) {
console.log('Elongator was required, but we failed to fetch a valid response');
return {};
}
if (useElongator) {
console.log('Elongator request failed to validate, trying again without it');
wasElongatorDisabled = true;
}
useElongator = false;
newTokenGenerated = true;
continue;
}
try {
/* If we've generated a new token, we'll cache it */
if (c.executionCtx && newTokenGenerated && activate) {
const cachingResponse = new Response(await activate.clone().text(), {
headers: {
...tokenHeaders,
'cache-control': `max-age=${Constants.GUEST_TOKEN_MAX_AGE}`
}
});
console.log('Caching guest token');
c.executionCtx.waitUntil(cache.put(guestTokenRequestCacheDummy.clone(), cachingResponse));
}
} catch (error) {
console.error((error as Error).stack);
}
// @ts-expect-error - We'll pin the guest token to whatever response we have
response.guestToken = guestToken;
console.log('twitterFetch is all done here, see you soon!');
return response;
}
console.log('Twitter has repeatedly denied our requests, so we give up now');
return null;
};
export const fetchUser = async (
username: string,
c: Context,
useElongator = experimentCheck(
Experiment.ELONGATOR_PROFILE_API,
typeof c.env?.TwitterProxy !== 'undefined'
)
): Promise<GraphQLUserResponse> => {
return (await twitterFetch(
c,
`${
Constants.TWITTER_ROOT
}/i/api/graphql/sLVLhk0bGj3MVFEKTdax1w/UserByScreenName?variables=${encodeURIComponent(
JSON.stringify({
screen_name: username,
withSafetyModeUserFields: true
})
)}&features=${encodeURIComponent(
JSON.stringify({
blue_business_profile_image_shape_enabled: true,
responsive_web_graphql_exclude_directive_enabled: true,
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
responsive_web_graphql_timeline_navigation_enabled: false,
verified_phone_label_enabled: true
})
)}`,
useElongator,
// Validator function
(_res: unknown) => {
const response = _res as GraphQLUserResponse;
// If _res.data is an empty object, we have no user
if (!Object.keys(response?.data || {}).length) {
console.log(`response.data is empty, can't continue`);
return false;
}
return !(
response?.data?.user?.result?.__typename !== 'User' ||
typeof response?.data?.user?.result?.legacy === 'undefined'
);
/*
return !(
typeof conversation.globalObjects === 'undefined' &&
(typeof conversation.errors === 'undefined' ||
conversation.errors?.[0]?.code === 239)
);
*/
},
false
)) as GraphQLUserResponse;
};