import React from 'react'; import Integration, { IntegrationFeature, IntegrationAlbum, IntegrationArtist, IntegrationTrack } from '../Integration'; import StoreLinkIcon from '../../../components/common/StoreLinkIcon'; import { IntegrationWith } from '../../../api/api'; import { runInNewContext } from 'vm'; import { TextRotateVertical } from '@material-ui/icons'; enum SearchType { Track = 'track', Artist = 'artist', Album = 'album', }; export function extractInitialData(text: string): any | undefined { // At the time of writing this, the scraper is trying to capture from the following block: // // initialData.push({ // path: ..., // params: {"query":"something"}, // data: "THIS", // }); // // the THIS part. // // Another variant was found in the field, where there was also additional encoding involved: // // initialData.push({ // path: '\/search', // params: JSON.parse('\x7b\x22query\x22:\x22something\x22\x7d') // data: 'THIS2' // }) // , where THIS2 was a string which also contained escape characters like \x7b and \x22. // Handle the 1st case. let pattern = /initialData\.push\({[\n\r\s]*path:.*[\n\r\s]+params:\s*{\s*['"]query['"].*[\n\r\s]+data:\s*['"](.*)['"]\s*[\n\r]/ let m = text.match(pattern); let dataline1 = Array.isArray(m) && m.length >= 2 ? m[1] : undefined; // Now parse the data line. let dataline1_clean = dataline1 ? dataline1.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"') : undefined; let json1 = dataline1_clean ? JSON.parse(dataline1_clean) : undefined; // Handle the 2nd case. let m2 = text.match(/params:[\s]*JSON\.parse\('([^']*)'\),[\n\r\s]*data:[\s]*'([^']*)'/g); let json2: any = undefined; if (Array.isArray(m2)) { m2.forEach((match: string) => { let decode = (s: string) => { var r = /\\x([\d\w]{2})/gi; let res = s.replace(r, function (match, grp) { return String.fromCharCode(parseInt(grp, 16)); }); return unescape(res); } let paramsline: string = decode((match.match(/params:[\s]*JSON\.parse\('([^']*)'/) as string[])[1]); if (!('query' in JSON.parse(paramsline))) { return; } let dataline2: string = decode((match.match(/data:[\s]*'([^']*)'/) as string[])[1]); json2 = JSON.parse(dataline2); }) } // Return either one that worked. let result = json1 || json2; console.log("initial data:", result); return result; } export function parseTracks(initialData: any): IntegrationTrack[] { try { var musicResponsiveListItemRenderers: any[] = []; // Scrape for any "Song"-type items. initialData.contents.sectionListRenderer.contents.forEach((c: any) => { if (c.musicShelfRenderer) { c.musicShelfRenderer.contents.forEach((cc: any) => { if (cc.musicResponsiveListItemRenderer && cc.musicResponsiveListItemRenderer.flexColumns && cc.musicResponsiveListItemRenderer.flexColumns[1] .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") { musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); } }) } }) console.log("Found song itemrenderers:", musicResponsiveListItemRenderers); return musicResponsiveListItemRenderers.map((s: any) => { // There are some options that were encountered in the field. // let videoId: string | undefined = undefined; // if('doubleTapCommand' in s) s = s || s.doubleTapCommand.watchEndpoint.videoId; // if('playlistItemData' in s) s = s || s.playlistItemData.videoId; let runs: any[] = []; // Gather all 'runs' fields together from all columns. s.flexColumns.forEach((column: any) => { runs.push(...column.musicResponsiveListItemFlexColumnRenderer.text.runs); }) // Find the runs that hold the title, artist or album. let title: string | undefined = undefined; let album: IntegrationAlbum = {}; let artist: IntegrationArtist = {}; let videoId: string | undefined = undefined; runs.forEach((run: any) => { if ('navigationEndpoint' in run && 'watchEndpoint' in run.navigationEndpoint && 'videoId' in run.navigationEndpoint.watchEndpoint) { videoId = run.navigationEndpoint.watchEndpoint.videoId; title = run.text; } else if ('navigationEndpoint' in run && 'browseEndpoint' in run.navigationEndpoint && 'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && 'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && 'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ALBUM') { album = { url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, name: run.text, } } else if ('navigationEndpoint' in run && 'browseEndpoint' in run.navigationEndpoint && 'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && 'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && 'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ARTIST') { artist = { url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, name: run.text, } } }); if(album.name && artist.name) { album.artist = artist; } return { title: title, url: `https://music.youtube.com/watch?v=${videoId}`, artist: artist, album: album, } }) } catch (e) { console.log("Error parsing tracks:", e.message); return []; } } export function parseArtists(initialData: any): IntegrationArtist[] { try { var musicResponsiveListItemRenderers: any[] = []; // Scrape for any "Artist"-type items. initialData.contents.sectionListRenderer.contents.forEach((c: any) => { if (c.musicShelfRenderer) { c.musicShelfRenderer.contents.forEach((cc: any) => { if (cc.musicResponsiveListItemRenderer && cc.musicResponsiveListItemRenderer.flexColumns && cc.musicResponsiveListItemRenderer.flexColumns[1] .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Artist") { musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); } }) } }) return musicResponsiveListItemRenderers.map((s: any) => { let browseId = s.navigationEndpoint.browseEndpoint.browseId; let columns = s.flexColumns; if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Artist") { throw new Error('artist item doesnt match scraper expectation'); } let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; return { name: name, url: `https://music.youtube.com/browse/${browseId}`, } }) } catch (e) { console.log("Error parsing artists:", e.message); return []; } } export function parseAlbums(initialData: any): IntegrationTrack[] { try { var musicResponsiveListItemRenderers: any[] = []; // Scrape for any "Artist"-type items. initialData.contents.sectionListRenderer.contents.forEach((c: any) => { if (c.musicShelfRenderer) { c.musicShelfRenderer.contents.forEach((cc: any) => { if (cc.musicResponsiveListItemRenderer && cc.musicResponsiveListItemRenderer.flexColumns && ["Album", "Single"].includes(cc.musicResponsiveListItemRenderer.flexColumns[1] .musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); } }) } }) return musicResponsiveListItemRenderers.map((s: any) => { let browseId = s.navigationEndpoint.browseEndpoint.browseId; let columns = s.flexColumns; if (!["Album", "Single"].includes(columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { throw new Error('album item doesnt match scraper expectation'); } let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; return { name: name, url: `https://music.youtube.com/browse/${browseId}`, } }) } catch (e) { console.log("Error parsing albums:", e.message); return []; } } export default class YoutubeMusicWebScraper extends Integration { integrationId: number; constructor(integrationId: number) { super(integrationId); this.integrationId = integrationId; } getFeatures(): IntegrationFeature[] { return [ IntegrationFeature.Test, IntegrationFeature.SearchTrack, IntegrationFeature.SearchAlbum, IntegrationFeature.SearchArtist, ] } getIcon(props: any) { return } providesStoreLink() { return IntegrationWith.YoutubeMusic; } async test(testParams: {}) { const response = await fetch( (process.env.REACT_APP_BACKEND || "") + `/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`); let text = await response.text(); let songs = parseTracks(extractInitialData(text)); if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") { throw new Error("Test failed; No One Knows was not correctly identified."); } } async searchTrack(query: string, limit: number): Promise { const response = await fetch( (process.env.REACT_APP_BACKEND || "") + `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); return parseTracks(extractInitialData(text)); } async searchAlbum(query: string, limit: number): Promise { const response = await fetch( (process.env.REACT_APP_BACKEND || "") + `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); return parseAlbums(extractInitialData(text)); } async searchArtist(query: string, limit: number): Promise { const response = await fetch( (process.env.REACT_APP_BACKEND || "") + `/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); let text = await response.text(); return parseArtists(extractInitialData(text)); } }