Add a YouTube Music web scraper.

editsong
Sander Vocke 5 years ago
parent 9c661d67ee
commit cd9ee9bbb1
  1. 9
      client/src/api.ts
  2. 16
      client/src/assets/youtubemusic_icon.svg
  3. 6
      client/src/components/common/StoreLinkIcon.tsx
  4. 32
      client/src/components/windows/settings/IntegrationSettings.tsx
  5. 2
      client/src/components/windows/song/EditSongDialog.tsx
  6. 15
      client/src/lib/integration/useIntegrations.tsx
  7. 203
      client/src/lib/integration/youtubemusic/YoutubeMusicWebScraper.tsx
  8. 15
      server/integrations/integrations.ts

@ -360,18 +360,21 @@ export const LogoutEndpoint = "/logout";
export enum IntegrationType { export enum IntegrationType {
SpotifyClientCredentials = "SpotifyClientCredentials", SpotifyClientCredentials = "SpotifyClientCredentials",
YoutubeWebScraper = "YoutubeWebScraper",
} }
export interface SpotifyClientCredentialsDetails { export interface SpotifyClientCredentialsDetails {
clientId: string, clientId: string,
} }
export interface SpotifyClientCredentialsSecretDetails { export interface SpotifyClientCredentialsSecretDetails {
clientSecret: string, clientSecret: string,
} }
export type IntegrationDetails = SpotifyClientCredentialsDetails; export interface YoutubeMusicWebScraperDetails {}
export type IntegrationSecretDetails = SpotifyClientCredentialsSecretDetails; export interface YoutubeMusicWebScraperSecretDetails {}
export type IntegrationDetails = SpotifyClientCredentialsDetails | YoutubeMusicWebScraperDetails;
export type IntegrationSecretDetails = SpotifyClientCredentialsSecretDetails | YoutubeMusicWebScraperSecretDetails;
// Create a new integration (POST). // Create a new integration (POST).
export const CreateIntegrationEndpoint = '/integration'; export const CreateIntegrationEndpoint = '/integration';

@ -0,0 +1,16 @@
<svg version="1.1" id="Layer_1"
xmlns="http://www.w3.org/2000/svg" x="0px" y="0px" viewBox="0 0 176 176"
enable-background="new 0 0 176 176">
<metadata>
<sfw xmlns="&ns_sfw;">
<slices></slices>
<sliceSourceBounds bottomLeftOrigin="true" height="176" width="176" x="8" y="-184"></sliceSourceBounds>
</sfw>
</metadata>
<g id="XMLID_167_">
<circle id="XMLID_791_" fill="#FF0000" cx="88" cy="88" r="88"/>
<path id="XMLID_42_" fill="#FFFFFF" d="M88,46c23.1,0,42,18.8,42,42s-18.8,42-42,42s-42-18.8-42-42S64.9,46,88,46 M88,42
c-25.4,0-46,20.6-46,46s20.6,46,46,46s46-20.6,46-46S113.4,42,88,42L88,42z"/>
<polygon id="XMLID_274_" fill="#FFFFFF" points="72,111 111,87 72,65 "/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 696 B

@ -1,10 +1,12 @@
import React from 'react'; import React from 'react';
import { ReactComponent as GPMIcon } from '../../assets/googleplaymusic_icon.svg'; import { ReactComponent as GPMIcon } from '../../assets/googleplaymusic_icon.svg';
import { ReactComponent as SpotifyIcon } from '../../assets/spotify_icon.svg'; import { ReactComponent as SpotifyIcon } from '../../assets/spotify_icon.svg';
import { ReactComponent as YoutubeMusicIcon } from '../../assets/youtubemusic_icon.svg';
export enum ExternalStore { export enum ExternalStore {
GooglePlayMusic = "Google Play Music", GooglePlayMusic = "Google Play Music",
Spotify = "Spotify", Spotify = "Spotify",
YoutubeMusic = "Youtube Music",
} }
export interface IProps { export interface IProps {
@ -16,6 +18,8 @@ export function whichStore(url: string) {
return ExternalStore.GooglePlayMusic; return ExternalStore.GooglePlayMusic;
} else if (url.includes('spotify.com')) { } else if (url.includes('spotify.com')) {
return ExternalStore.Spotify; return ExternalStore.Spotify;
} else if (url.includes('music.youtube.com')) {
return ExternalStore.YoutubeMusic;
} }
return undefined; return undefined;
} }
@ -31,6 +35,8 @@ export default function StoreLinkIcon(props: any) {
return <GPMIcon {...restProps} style={realStyle} />; return <GPMIcon {...restProps} style={realStyle} />;
case ExternalStore.Spotify: case ExternalStore.Spotify:
return <SpotifyIcon {...restProps} style={realStyle} />; return <SpotifyIcon {...restProps} style={realStyle} />;
case ExternalStore.YoutubeMusic:
return <YoutubeMusicIcon {...restProps} style={realStyle} />;
default: default:
throw new Error("Unknown external store: " + whichStore) throw new Error("Unknown external store: " + whichStore)
} }

@ -84,7 +84,16 @@ function EditIntegration(props: {
})} })}
</Box> </Box>
<Typography>Spotify (using Client Credentials)</Typography> <Typography>Spotify (using Client Credentials)</Typography>
</Box>,
[serverApi.IntegrationType.YoutubeWebScraper]:
<Box display="flex" alignItems="center">
<Box mr={1}>
{new IntegrationClasses[serverApi.IntegrationType.YoutubeWebScraper](-1).getIcon({
style: { height: '40px', width: '40px' }
})}
</Box> </Box>
<Typography>Youtube Music (using experimental web scraper)</Typography>
</Box>,
} }
let IntegrationDescription: Record<any, any> = { let IntegrationDescription: Record<any, any> = {
[serverApi.IntegrationType.SpotifyClientCredentials]: [serverApi.IntegrationType.SpotifyClientCredentials]:
@ -95,7 +104,15 @@ function EditIntegration(props: {
Please see the Spotify API documentation on how to generate a client ID Please see the Spotify API documentation on how to generate a client ID
and client secret. Once set, you will only be able to overwrite the secret and client secret. Once set, you will only be able to overwrite the secret
here, not read it. here, not read it.
</Typography> </Typography>,
[serverApi.IntegrationType.YoutubeWebScraper]:
<Typography>
This integration allows using the public Youtube Music search page to scrape
for music metadata. <br />
Because it relies on reverse-engineering of a web page that may change in the
future, this is considered to be experimental and unstable. However, the music links acquired
using this method are expected to remain reasonably stable.
</Typography>,
} }
return <Card variant="outlined"> return <Card variant="outlined">
@ -122,8 +139,9 @@ function EditIntegration(props: {
</Box> </Box>
{props.integration.type === serverApi.IntegrationType.SpotifyClientCredentials && {props.integration.type === serverApi.IntegrationType.SpotifyClientCredentials &&
<EditSpotifyClientCredentialsDetails <EditSpotifyClientCredentialsDetails
clientId={props.integration.details.clientId} clientId={'clientId' in props.integration.details &&
clientSecret={props.integration.secretDetails ? props.integration.details.clientId || ""}
clientSecret={props.integration.secretDetails && 'clientSecret' in props.integration.secretDetails ?
props.integration.secretDetails.clientSecret : props.integration.secretDetails.clientSecret :
(props.isNew ? "" : null)} (props.isNew ? "" : null)}
editing={props.editing || false} editing={props.editing || false}
@ -207,7 +225,13 @@ function AddIntegrationMenu(props: {
props.onAdd && props.onAdd(serverApi.IntegrationType.SpotifyClientCredentials); props.onAdd && props.onAdd(serverApi.IntegrationType.SpotifyClientCredentials);
props.onClose && props.onClose(); props.onClose && props.onClose();
}} }}
>Spotify</MenuItem> >Spotify via Client Credentials</MenuItem>
<MenuItem
onClick={() => {
props.onAdd && props.onAdd(serverApi.IntegrationType.YoutubeWebScraper);
props.onClose && props.onClose();
}}
>Youtube Music Web Scraper</MenuItem>
</Menu> </Menu>
} }

@ -23,7 +23,7 @@ export function ProvideLinksWidget(props: {
props.providers.length > 0 ? 0 : undefined props.providers.length > 0 ? 0 : undefined
); );
let [query, setQuery] = useState<string>( let [query, setQuery] = useState<string>(
`${props.metadata.title} ${props.metadata.artists && props.metadata.artists[0].name}` `${props.metadata.title}${props.metadata.artists && ` ${props.metadata.artists[0].name}`}${props.metadata.albums && ` ${props.metadata.albums[0].name}`}`
) )
let [results, setResults] = useState<IntegrationSong[]>([]); let [results, setResults] = useState<IntegrationSong[]>([]);

@ -5,6 +5,7 @@ import SpotifyClientCreds from "./spotify/SpotifyClientCreds";
import * as backend from "../backend/integrations"; import * as backend from "../backend/integrations";
import { handleNotLoggedIn, NotLoggedInError } from "../backend/request"; import { handleNotLoggedIn, NotLoggedInError } from "../backend/request";
import { useAuth } from "../useAuth"; import { useAuth } from "../useAuth";
import YoutubeMusicWebScraper from "./youtubemusic/YoutubeMusicWebScraper";
export type IntegrationState = { export type IntegrationState = {
id: number, id: number,
@ -27,6 +28,7 @@ export interface Integrations {
export const IntegrationClasses: Record<any, any> = { export const IntegrationClasses: Record<any, any> = {
[serverApi.IntegrationType.SpotifyClientCredentials]: SpotifyClientCreds, [serverApi.IntegrationType.SpotifyClientCredentials]: SpotifyClientCreds,
[serverApi.IntegrationType.YoutubeWebScraper]: YoutubeMusicWebScraper,
} }
export function makeDefaultIntegrationProperties(type: serverApi.IntegrationType): export function makeDefaultIntegrationProperties(type: serverApi.IntegrationType):
@ -34,12 +36,20 @@ export function makeDefaultIntegrationProperties(type: serverApi.IntegrationType
switch(type) { switch(type) {
case serverApi.IntegrationType.SpotifyClientCredentials: { case serverApi.IntegrationType.SpotifyClientCredentials: {
return { return {
name: "Spotify", name: "Spotify App",
type: type, type: type,
details: { clientId: "" }, details: { clientId: "" },
secretDetails: { clientSecret: "" }, secretDetails: { clientSecret: "" },
} }
} }
case serverApi.IntegrationType.YoutubeWebScraper: {
return {
name: "Youtube Music Web Scraper",
type: type,
details: {},
secretDetails: {},
}
}
default: { default: {
throw new Error("Unimplemented default integration.") throw new Error("Unimplemented default integration.")
} }
@ -51,6 +61,9 @@ export function makeIntegration(p: serverApi.CreateIntegrationRequest, id: numbe
case serverApi.IntegrationType.SpotifyClientCredentials: { case serverApi.IntegrationType.SpotifyClientCredentials: {
return new SpotifyClientCreds(id); return new SpotifyClientCreds(id);
} }
case serverApi.IntegrationType.YoutubeWebScraper: {
return new YoutubeMusicWebScraper(id);
}
default: { default: {
throw new Error("Unimplemented integration type.") throw new Error("Unimplemented integration type.")
} }

@ -0,0 +1,203 @@
import React from 'react';
import Integration, { IntegrationFeature, IntegrationAlbum, IntegrationArtist, IntegrationSong } from '../Integration';
import StoreLinkIcon, { ExternalStore } from '../../../components/common/StoreLinkIcon';
enum SearchType {
Song = 'track',
Artist = 'artist',
Album = 'album',
};
export function extractInitialData(text: string): any | undefined {
// At the time of writing this, the scraper is trying to capture from the following block:
//
// initialData.push({
// path: ...,
// params: {"query":"something"},
// data: "THIS",
// });
//
// the THIS part.
// Get the whole line containing the data part.
let pattern = /initialData\.push\({[\n\r\s]*path:.*[\n\r\s]+params:\s*{\s*['"]query['"].*[\n\r\s]+data:\s*['"](.*)['"]\s*[\n\r]/
let m = text.match(pattern);
let dataline = Array.isArray(m) && m.length >= 2 ? m[1] : undefined;
if (!dataline) { return undefined; }
// Now parse the data line.
let dataline_clean = dataline.replace(/\\"/g, '"').replace(/\\\\"/g, '\\"')
console.log(dataline);
console.log(dataline_clean);
let json = JSON.parse(dataline_clean);
return json;
}
export function parseSongs(initialData: any): IntegrationSong[] {
try {
var songMusicResponsiveListItemRenderers: any[] = [];
// Scrape for any "Song"-type items.
initialData.contents.sectionListRenderer.contents.forEach((c: any) => {
if (c.musicShelfRenderer) {
c.musicShelfRenderer.contents.forEach((cc: any) => {
if (cc.musicResponsiveListItemRenderer &&
cc.musicResponsiveListItemRenderer.flexColumns &&
cc.musicResponsiveListItemRenderer.flexColumns[1]
.musicResponsiveListItemFlexColumnRenderer.text.runs[0].text === "Song") {
songMusicResponsiveListItemRenderers.push(cc.musicResponsiveListItemRenderer);
}
})
}
})
return songMusicResponsiveListItemRenderers.map((s: any) => {
let videoId = s.doubleTapCommand.watchEndpoint.videoId;
let columns = s.flexColumns;
if (columns[1].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text !== "Song") {
throw new Error('song item doesnt match scraper expectation');
}
let title = columns[0].musicResponsiveListItemFlexColumnRenderer.text.runs[0].text;
let artists = columns[2].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => {
return 'navigationEndpoint' in run;
}).map((run: any) => {
let id = run.navigationEndpoint.browseEndpoint.browseId;
return {
url: `https://music.youtube.com/browse/${id}`,
name: run.text,
}
});
let albums = columns[3].musicResponsiveListItemFlexColumnRenderer.text.runs.filter((run: any) => {
return 'navigationEndpoint' in run;
}).map((run: any) => {
let id = run.navigationEndpoint.browseEndpoint.browseId;
return {
url: `https://music.youtube.com/browse/${id}`,
name: run.text,
artist: artists[0],
}
});
return {
title: title,
url: `https://music.youtube.com/watch?v=${videoId}`,
artist: artists[0],
album: albums[0],
}
})
} catch (e) {
console.log("Error parsing songs:", e.message);
return [];
}
}
export default class YoutubeMusicWebScraper extends Integration {
integrationId: number;
constructor(integrationId: number) {
super(integrationId);
this.integrationId = integrationId;
}
getFeatures(): IntegrationFeature[] {
return [
IntegrationFeature.Test,
IntegrationFeature.SearchSong,
IntegrationFeature.SearchAlbum,
IntegrationFeature.SearchArtist,
]
}
getIcon(props: any) {
return <StoreLinkIcon whichStore={ExternalStore.YoutubeMusic} {...props} />
}
providesStoreLink() {
return ExternalStore.YoutubeMusic;
}
async test(testParams: {}) {
const response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent('No One Knows Queens Of The Stone Age')}`);
let text = await response.text();
let songs = parseSongs(extractInitialData(text));
console.log("Found songs", songs);
if (!Array.isArray(songs) || songs.length === 0 || songs[0].title !== "No One Knows") {
throw new Error("Test failed; No One Knows was not correctly identified.");
}
}
async searchSong(query: string, limit: number): Promise<IntegrationSong[]> {
const response = await fetch(
(process.env.REACT_APP_BACKEND || "") +
`/integrations/${this.integrationId}/search?q=${encodeURIComponent(query)}`);
let text = await response.text();
return parseSongs(extractInitialData(text));
}
async searchAlbum(query: string, limit: number): Promise<IntegrationAlbum[]> { return []; }
async searchArtist(query: string, limit: number): Promise<IntegrationArtist[]> { return []; }
async search(query: string, type: SearchType, limit: number):
Promise<IntegrationSong[] | IntegrationAlbum[] | IntegrationArtist[]> {
return [];
// const response = await fetch(
// (process.env.REACT_APP_BACKEND || "") +
// `/integrations/${this.integrationId}/v1/search?q=${encodeURIComponent(query)}&type=${type}&limit=${limit}`);
// if (!response.ok) {
// throw new Error("Spotify Client Credentials search failed: " + JSON.stringify(response));
// }
// let json = await response.json();
// console.log("Response:", json);
// switch(type) {
// case SearchType.Song: {
// return json.tracks.items.map((r: any): IntegrationSong => {
// return {
// title: r.name,
// url: r.external_urls.spotify,
// artist: {
// name: r.artists && r.artists[0].name,
// url: r.artists && r.artists[0].external_urls.spotify,
// },
// album: {
// name: r.album && r.album.name,
// url: r.album && r.album.external_urls.spotify,
// }
// }
// })
// }
// case SearchType.Artist: {
// return json.artists.items.map((r: any): IntegrationArtist => {
// return {
// name: r.name,
// url: r.external_urls.spotify,
// }
// })
// }
// case SearchType.Album: {
// return json.albums.items.map((r: any): IntegrationAlbum => {
// return {
// name: r.name,
// url: r.external_urls.spotify,
// artist: {
// name: r.artists[0].name,
// url: r.artists[0].external_urls.spotify,
// },
// }
// })
// }
}
}

@ -47,6 +47,17 @@ export function createIntegrations(knex: Knex) {
} }
}); });
let proxyYoutubeMusic = createProxyMiddleware({
target: 'https://music.youtube.com/',
changeOrigin: true,
logLevel: 'debug',
pathRewrite: (path: string, req: any) => {
// Remove e.g. "/integrations/5"
console.log("Rewrite URL:", path);
return path.replace(/^\/integrations\/[0-9]+/, '');
}
})
// In the first layer, retrieve integration details and save details // In the first layer, retrieve integration details and save details
// in the request. // in the request.
return async (req: any, res: any, next: any) => { return async (req: any, res: any, next: any) => {
@ -82,6 +93,10 @@ export function createIntegrations(knex: Knex) {
req.headers["Authorization"] = "Bearer " + req._access_token; req.headers["Authorization"] = "Bearer " + req._access_token;
return proxySpotifyCC(req, res, next); return proxySpotifyCC(req, res, next);
} }
case IntegrationType.YoutubeWebScraper: {
console.log("Integration: ", req._integration)
return proxyYoutubeMusic(req, res, next);
}
default: { default: {
res.status(500).send({ reason: "Unsupported integration type " + req._integration.type }) res.status(500).send({ reason: "Unsupported integration type " + req._integration.type })
} }

Loading…
Cancel
Save