You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
289 lines
12 KiB
289 lines
12 KiB
import React from 'react'; |
|
import Integration, { IntegrationFeature, IntegrationAlbum, IntegrationArtist, IntegrationTrack } from '../Integration'; |
|
import StoreLinkIcon from '../../../components/common/StoreLinkIcon'; |
|
import { IntegrationWith } from '../../../api/api'; |
|
import { runInNewContext } from 'vm'; |
|
import { TextRotateVertical } from '@material-ui/icons'; |
|
|
|
enum SearchType { |
|
Track = 'track', |
|
Artist = 'artist', |
|
Album = 'album', |
|
}; |
|
|
|
export function extractInitialData(text: string): any | undefined { |
|
// At the time of writing this, the scraper is trying to capture from the following block: |
|
// |
|
// initialData.push({ |
|
// path: ..., |
|
// params: {"query":"something"}, |
|
// data: "THIS", |
|
// }); |
|
// |
|
// the THIS part. |
|
// |
|
// Another variant was found in the field, where there was also additional encoding involved: |
|
// |
|
// initialData.push({ |
|
// path: '\/search', |
|
// params: JSON.parse('\x7b\x22query\x22:\x22something\x22\x7d') |
|
// data: 'THIS2' |
|
// }) |
|
// , where THIS2 was a string which also contained escape characters like \x7b and \x22. |
|
|
|
// Handle the 1st case. |
|
let pattern = /initialData\.push\({[\n\r\s]*path:.*[\n\r\s]+params:\s*{\s*['"]query['"].*[\n\r\s]+data:\s*['"](.*)['"]\s*[\n\r]/ |
|
let m = text.match(pattern); |
|
let dataline1 = Array.isArray(m) && m.length >= 2 ? m[1] : undefined; |
|
// Now parse the data line. |
|
let dataline1_clean = dataline1 ? dataline1.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"') : undefined; |
|
let json1 = dataline1_clean ? JSON.parse(dataline1_clean) : undefined; |
|
|
|
// Handle the 2nd case. |
|
let m2 = text.match(/params:[\s]*JSON\.parse\('([^']*)'\),[\n\r\s]*data:[\s]*'([^']*)'/g); |
|
let json2: any = undefined; |
|
if (Array.isArray(m2)) { |
|
m2.forEach((match: string) => { |
|
let decode = (s: string) => { |
|
var r = /\\x([\d\w]{2})/gi; |
|
let res = s.replace(r, function (match, grp) { |
|
return String.fromCharCode(parseInt(grp, 16)); |
|
}); |
|
return unescape(res); |
|
} |
|
let paramsline: string = decode((match.match(/params:[\s]*JSON\.parse\('([^']*)'/) as string[])[1]); |
|
if (!('query' in JSON.parse(paramsline))) { |
|
return; |
|
} |
|
let dataline2: string = decode((match.match(/data:[\s]*'([^']*)'/) as string[])[1]); |
|
json2 = JSON.parse(dataline2); |
|
}) |
|
} |
|
|
|
// Return either one that worked. |
|
let result = json1 || json2; |
|
console.log("initial data:", result); |
|
return result; |
|
} |
|
|
|
export function parseTracks(initialData: any): IntegrationTrack[] { |
|
try { |
|
var musicResponsiveListItemRenderers: any[] = []; |
|
|
|
// Scrape for any "Song"-type items. |
|
initialData.contents.sectionListRenderer.contents.forEach((c: any) => { |
|
if (c.musicShelfRenderer) { |
|
c.musicShelfRenderer.contents.forEach((cc: any) => { |
|
if (cc.musicResponsiveListItemRenderer && |
|
cc.musicResponsiveListItemRenderer.flexColumns && |
|
cc.musicResponsiveListItemRenderer.flexColumns[1] |
|
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") { |
|
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); |
|
} |
|
}) |
|
} |
|
}) |
|
|
|
console.log("Found song itemrenderers:", musicResponsiveListItemRenderers); |
|
|
|
return musicResponsiveListItemRenderers.map((s: any) => { |
|
// There are some options that were encountered in the field. |
|
// let videoId: string | undefined = undefined; |
|
// if('doubleTapCommand' in s) s = s || s.doubleTapCommand.watchEndpoint.videoId; |
|
// if('playlistItemData' in s) s = s || s.playlistItemData.videoId; |
|
|
|
let runs: any[] = []; |
|
// Gather all 'runs' fields together from all columns. |
|
s.flexColumns.forEach((column: any) => { |
|
runs.push(...column.musicResponsiveListItemFlexColumnRenderer.text.runs); |
|
}) |
|
|
|
// Find the runs that hold the title, artist or album. |
|
let title: string | undefined = undefined; |
|
let album: IntegrationAlbum = {}; |
|
let artist: IntegrationArtist = {}; |
|
let videoId: string | undefined = undefined; |
|
runs.forEach((run: any) => { |
|
if ('navigationEndpoint' in run && |
|
'watchEndpoint' in run.navigationEndpoint && |
|
'videoId' in run.navigationEndpoint.watchEndpoint) { |
|
videoId = run.navigationEndpoint.watchEndpoint.videoId; |
|
title = run.text; |
|
} else if ('navigationEndpoint' in run && |
|
'browseEndpoint' in run.navigationEndpoint && |
|
'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && |
|
'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && |
|
'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && |
|
run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ALBUM') { |
|
album = { |
|
url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, |
|
name: run.text, |
|
} |
|
} else if ('navigationEndpoint' in run && |
|
'browseEndpoint' in run.navigationEndpoint && |
|
'browseEndpointContextSupportedConfigs' in run.navigationEndpoint.browseEndpoint && |
|
'browseEndpointContextMusicConfig' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs && |
|
'pageType' in run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig && |
|
run.navigationEndpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType === 'MUSIC_PAGE_TYPE_ARTIST') { |
|
artist = { |
|
url: `https://music.youtube.com/browse/${run.navigationEndpoint.browseEndpoint.browseId}`, |
|
name: run.text, |
|
} |
|
} |
|
}); |
|
|
|
if(album.name && artist.name) { |
|
album.artist = artist; |
|
} |
|
return { |
|
title: title, |
|
url: `https://music.youtube.com/watch?v=${videoId}`, |
|
artist: artist, |
|
album: album, |
|
} |
|
}) |
|
} catch (e) { |
|
console.log("Error parsing tracks:", e.message); |
|
return []; |
|
} |
|
} |
|
|
|
export function parseArtists(initialData: any): IntegrationArtist[] { |
|
try { |
|
var musicResponsiveListItemRenderers: any[] = []; |
|
|
|
// Scrape for any "Artist"-type items. |
|
initialData.contents.sectionListRenderer.contents.forEach((c: any) => { |
|
if (c.musicShelfRenderer) { |
|
c.musicShelfRenderer.contents.forEach((cc: any) => { |
|
if (cc.musicResponsiveListItemRenderer && |
|
cc.musicResponsiveListItemRenderer.flexColumns && |
|
cc.musicResponsiveListItemRenderer.flexColumns[1] |
|
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Artist") { |
|
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); |
|
} |
|
}) |
|
} |
|
}) |
|
|
|
return musicResponsiveListItemRenderers.map((s: any) => { |
|
let browseId = s.navigationEndpoint.browseEndpoint.browseId; |
|
let columns = s.flexColumns; |
|
|
|
if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Artist") { |
|
throw new Error('artist item doesnt match scraper expectation'); |
|
} |
|
let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; |
|
|
|
return { |
|
name: name, |
|
url: `https://music.youtube.com/browse/${browseId}`, |
|
} |
|
}) |
|
} catch (e) { |
|
console.log("Error parsing artists:", e.message); |
|
return []; |
|
} |
|
} |
|
|
|
export function parseAlbums(initialData: any): IntegrationTrack[] { |
|
try { |
|
var musicResponsiveListItemRenderers: any[] = []; |
|
|
|
// Scrape for any "Artist"-type items. |
|
initialData.contents.sectionListRenderer.contents.forEach((c: any) => { |
|
if (c.musicShelfRenderer) { |
|
c.musicShelfRenderer.contents.forEach((cc: any) => { |
|
if (cc.musicResponsiveListItemRenderer && |
|
cc.musicResponsiveListItemRenderer.flexColumns && |
|
["Album", "Single"].includes(cc.musicResponsiveListItemRenderer.flexColumns[1] |
|
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { |
|
musicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer); |
|
} |
|
}) |
|
} |
|
}) |
|
|
|
return musicResponsiveListItemRenderers.map((s: any) => { |
|
let browseId = s.navigationEndpoint.browseEndpoint.browseId; |
|
let columns = s.flexColumns; |
|
|
|
if (!["Album", "Single"].includes(columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text)) { |
|
throw new Error('album item doesnt match scraper expectation'); |
|
} |
|
let name = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text; |
|
|
|
return { |
|
name: name, |
|
url: `https://music.youtube.com/browse/${browseId}`, |
|
} |
|
}) |
|
} catch (e) { |
|
console.log("Error parsing albums:", e.message); |
|
return []; |
|
} |
|
} |
|
|
|
export default class YoutubeMusicWebScraper extends Integration { |
|
integrationId: number; |
|
|
|
constructor(integrationId: number) { |
|
super(integrationId); |
|
this.integrationId = integrationId; |
|
} |
|
|
|
getFeatures(): IntegrationFeature[] { |
|
return [ |
|
IntegrationFeature.Test, |
|
IntegrationFeature.SearchTrack, |
|
IntegrationFeature.SearchAlbum, |
|
IntegrationFeature.SearchArtist, |
|
] |
|
} |
|
|
|
getIcon(props: any) { |
|
return <StoreLinkIcon whichStore={IntegrationWith.YoutubeMusic} {...props} /> |
|
} |
|
|
|
providesStoreLink() { |
|
return IntegrationWith.YoutubeMusic; |
|
} |
|
|
|
async test(testParams: {}) { |
|
const response = await fetch( |
|
(process.env.REACT_APP_BACKEND || "") + |
|
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`); |
|
|
|
let text = await response.text(); |
|
let songs = parseTracks(extractInitialData(text)); |
|
|
|
if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") { |
|
throw new Error("Test failed; No One Knows was not correctly identified."); |
|
} |
|
} |
|
|
|
async searchTrack(query: string, limit: number): Promise<IntegrationTrack[]> { |
|
const response = await fetch( |
|
(process.env.REACT_APP_BACKEND || "") + |
|
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); |
|
|
|
let text = await response.text(); |
|
return parseTracks(extractInitialData(text)); |
|
} |
|
async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> { |
|
const response = await fetch( |
|
(process.env.REACT_APP_BACKEND || "") + |
|
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); |
|
|
|
let text = await response.text(); |
|
return parseAlbums(extractInitialData(text)); |
|
} |
|
async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> { |
|
const response = await fetch( |
|
(process.env.REACT_APP_BACKEND || "") + |
|
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`); |
|
|
|
let text = await response.text(); |
|
return parseArtists(extractInitialData(text)); |
|
} |
|
} |